Toucan-Qwen2.5-14B-Instruct-v0.1 / trainer_state.json
zhangchenxu's picture
Upload folder using huggingface_hub
13e3e80 verified
{
"best_global_step": 156,
"best_metric": 0.83697891,
"best_model_checkpoint": "/proj/checkpoints/zhangchen/tool-rl-dev/sft_models/Qwen2.5-14B-Instruct-MIX-KimiK2-DD3-LR2.0e-5-EPOCHS2/v0-20250919-070250/checkpoint-156",
"epoch": 2.0,
"eval_steps": 16,
"global_step": 156,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01282051282051282,
"grad_norm": 9.204224584296727,
"learning_rate": 1.9997972289848505e-05,
"loss": 1.4553444385528564,
"step": 1
},
{
"epoch": 0.0641025641025641,
"grad_norm": 1.4530528528883362,
"learning_rate": 1.9949348350626456e-05,
"loss": 1.1599386930465698,
"step": 5
},
{
"epoch": 0.1282051282051282,
"grad_norm": 0.5423824300175941,
"learning_rate": 1.979790652042268e-05,
"loss": 1.049617385864258,
"step": 10
},
{
"epoch": 0.19230769230769232,
"grad_norm": 0.4547116616967532,
"learning_rate": 1.954720866508546e-05,
"loss": 1.0327177047729492,
"step": 15
},
{
"epoch": 0.20512820512820512,
"eval_loss": 0.9275281429290771,
"eval_runtime": 3.1525,
"eval_samples_per_second": 33.624,
"eval_steps_per_second": 0.317,
"eval_token_acc": 0.7512940864412847,
"step": 16
},
{
"epoch": 0.2564102564102564,
"grad_norm": 0.39026042319054643,
"learning_rate": 1.9199794436588244e-05,
"loss": 1.0138656616210937,
"step": 20
},
{
"epoch": 0.32051282051282054,
"grad_norm": 0.31359214563669735,
"learning_rate": 1.875918325566888e-05,
"loss": 0.9882354736328125,
"step": 25
},
{
"epoch": 0.38461538461538464,
"grad_norm": 0.3448089787765916,
"learning_rate": 1.8229838658936566e-05,
"loss": 0.9615086555480957,
"step": 30
},
{
"epoch": 0.41025641025641024,
"eval_loss": 0.885535478591919,
"eval_runtime": 3.1496,
"eval_samples_per_second": 33.655,
"eval_steps_per_second": 0.318,
"eval_token_acc": 0.760162978607562,
"step": 32
},
{
"epoch": 0.44871794871794873,
"grad_norm": 0.2761120555741548,
"learning_rate": 1.761712308177359e-05,
"loss": 0.9588653564453125,
"step": 35
},
{
"epoch": 0.5128205128205128,
"grad_norm": 0.2440053485566061,
"learning_rate": 1.6927243535095995e-05,
"loss": 0.9517783164978028,
"step": 40
},
{
"epoch": 0.5769230769230769,
"grad_norm": 0.2512274854383336,
"learning_rate": 1.6167188726285433e-05,
"loss": 0.9253202438354492,
"step": 45
},
{
"epoch": 0.6153846153846154,
"eval_loss": 0.8652209043502808,
"eval_runtime": 3.1393,
"eval_samples_per_second": 33.766,
"eval_steps_per_second": 0.319,
"eval_token_acc": 0.764574489619886,
"step": 48
},
{
"epoch": 0.6410256410256411,
"grad_norm": 0.24767273865856176,
"learning_rate": 1.5344658261278013e-05,
"loss": 0.923402214050293,
"step": 50
},
{
"epoch": 0.7051282051282052,
"grad_norm": 0.22920107782733407,
"learning_rate": 1.4467984645016259e-05,
"loss": 0.9294641494750977,
"step": 55
},
{
"epoch": 0.7692307692307693,
"grad_norm": 0.2734790600557061,
"learning_rate": 1.3546048870425356e-05,
"loss": 0.9429225921630859,
"step": 60
},
{
"epoch": 0.8205128205128205,
"eval_loss": 0.8536690473556519,
"eval_runtime": 3.2912,
"eval_samples_per_second": 32.208,
"eval_steps_per_second": 0.304,
"eval_token_acc": 0.7668949204801208,
"step": 64
},
{
"epoch": 0.8333333333333334,
"grad_norm": 0.2458526724445435,
"learning_rate": 1.2588190451025209e-05,
"loss": 0.9190584182739258,
"step": 65
},
{
"epoch": 0.8974358974358975,
"grad_norm": 0.24846448980401165,
"learning_rate": 1.1604112808577603e-05,
"loss": 0.918177604675293,
"step": 70
},
{
"epoch": 0.9615384615384616,
"grad_norm": 0.22516966308872668,
"learning_rate": 1.0603784974222862e-05,
"loss": 0.896082878112793,
"step": 75
},
{
"epoch": 1.0256410256410255,
"grad_norm": 0.28109415933672915,
"learning_rate": 9.597340598905851e-06,
"loss": 0.8887803077697753,
"step": 80
},
{
"epoch": 1.0256410256410255,
"eval_loss": 0.846965491771698,
"eval_runtime": 3.1974,
"eval_samples_per_second": 33.152,
"eval_steps_per_second": 0.313,
"eval_token_acc": 0.7683886916140407,
"step": 80
},
{
"epoch": 1.0897435897435896,
"grad_norm": 0.28540387671693573,
"learning_rate": 8.594975296149076e-06,
"loss": 0.8414465904235839,
"step": 85
},
{
"epoch": 1.1538461538461537,
"grad_norm": 0.2803963137369875,
"learning_rate": 7.606843357124426e-06,
"loss": 0.8364896774291992,
"step": 90
},
{
"epoch": 1.217948717948718,
"grad_norm": 0.2416844219127749,
"learning_rate": 6.6429548843339554e-06,
"loss": 0.8242883682250977,
"step": 95
},
{
"epoch": 1.2307692307692308,
"eval_loss": 0.8430743217468262,
"eval_runtime": 3.2098,
"eval_samples_per_second": 33.024,
"eval_steps_per_second": 0.312,
"eval_token_acc": 0.7694875806591539,
"step": 96
},
{
"epoch": 1.282051282051282,
"grad_norm": 0.2515386758810601,
"learning_rate": 5.713074385969457e-06,
"loss": 0.835479736328125,
"step": 100
},
{
"epoch": 1.3461538461538463,
"grad_norm": 0.23492463105327158,
"learning_rate": 4.826621858223431e-06,
"loss": 0.8471467018127441,
"step": 105
},
{
"epoch": 1.4102564102564101,
"grad_norm": 0.2990830422111698,
"learning_rate": 3.99257735762021e-06,
"loss": 0.8314805030822754,
"step": 110
},
{
"epoch": 1.435897435897436,
"eval_loss": 0.8396947979927063,
"eval_runtime": 3.1689,
"eval_samples_per_second": 33.451,
"eval_steps_per_second": 0.316,
"eval_token_acc": 0.7704079750227107,
"step": 112
},
{
"epoch": 1.4743589743589745,
"grad_norm": 0.21721857976027215,
"learning_rate": 3.2193900300810908e-06,
"loss": 0.8310983657836915,
"step": 115
},
{
"epoch": 1.5384615384615383,
"grad_norm": 0.21954724710062298,
"learning_rate": 2.514892518288988e-06,
"loss": 0.8174694061279297,
"step": 120
},
{
"epoch": 1.6025641025641026,
"grad_norm": 0.2130942259472865,
"learning_rate": 1.8862216144342692e-06,
"loss": 0.8244733810424805,
"step": 125
},
{
"epoch": 1.641025641025641,
"eval_loss": 0.8378878235816956,
"eval_runtime": 3.1707,
"eval_samples_per_second": 33.431,
"eval_steps_per_second": 0.315,
"eval_token_acc": 0.7707161027132189,
"step": 128
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.2051128644478209,
"learning_rate": 1.339745962155613e-06,
"loss": 0.8531595230102539,
"step": 130
},
{
"epoch": 1.7307692307692308,
"grad_norm": 0.2033449517761299,
"learning_rate": 8.810015400790994e-07,
"loss": 0.8210726737976074,
"step": 135
},
{
"epoch": 1.7948717948717947,
"grad_norm": 0.2108097228299701,
"learning_rate": 5.146355805285452e-07,
"loss": 0.8313694000244141,
"step": 140
},
{
"epoch": 1.8461538461538463,
"eval_loss": 0.8371573686599731,
"eval_runtime": 3.1872,
"eval_samples_per_second": 33.259,
"eval_steps_per_second": 0.314,
"eval_token_acc": 0.7708367611292432,
"step": 144
},
{
"epoch": 1.858974358974359,
"grad_norm": 0.20286457323683132,
"learning_rate": 2.4435949152906144e-07,
"loss": 0.827159309387207,
"step": 145
},
{
"epoch": 1.9230769230769231,
"grad_norm": 0.19816039815698575,
"learning_rate": 7.291125901946027e-08,
"loss": 0.8328804016113281,
"step": 150
},
{
"epoch": 1.9871794871794872,
"grad_norm": 0.20859708124459583,
"learning_rate": 2.0277101514987184e-09,
"loss": 0.824882698059082,
"step": 155
},
{
"epoch": 2.0,
"eval_loss": 0.8369789123535156,
"eval_runtime": 3.1635,
"eval_samples_per_second": 33.508,
"eval_steps_per_second": 0.316,
"eval_token_acc": 0.7708906086868077,
"step": 156
}
],
"logging_steps": 5,
"max_steps": 156,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3297382913540096.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}