| { | |
| "best_global_step": 156, | |
| "best_metric": 0.83697891, | |
| "best_model_checkpoint": "/proj/checkpoints/zhangchen/tool-rl-dev/sft_models/Qwen2.5-14B-Instruct-MIX-KimiK2-DD3-LR2.0e-5-EPOCHS2/v0-20250919-070250/checkpoint-156", | |
| "epoch": 2.0, | |
| "eval_steps": 16, | |
| "global_step": 156, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01282051282051282, | |
| "grad_norm": 9.204224584296727, | |
| "learning_rate": 1.9997972289848505e-05, | |
| "loss": 1.4553444385528564, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0641025641025641, | |
| "grad_norm": 1.4530528528883362, | |
| "learning_rate": 1.9949348350626456e-05, | |
| "loss": 1.1599386930465698, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.1282051282051282, | |
| "grad_norm": 0.5423824300175941, | |
| "learning_rate": 1.979790652042268e-05, | |
| "loss": 1.049617385864258, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.19230769230769232, | |
| "grad_norm": 0.4547116616967532, | |
| "learning_rate": 1.954720866508546e-05, | |
| "loss": 1.0327177047729492, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.20512820512820512, | |
| "eval_loss": 0.9275281429290771, | |
| "eval_runtime": 3.1525, | |
| "eval_samples_per_second": 33.624, | |
| "eval_steps_per_second": 0.317, | |
| "eval_token_acc": 0.7512940864412847, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 0.39026042319054643, | |
| "learning_rate": 1.9199794436588244e-05, | |
| "loss": 1.0138656616210937, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.32051282051282054, | |
| "grad_norm": 0.31359214563669735, | |
| "learning_rate": 1.875918325566888e-05, | |
| "loss": 0.9882354736328125, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.3448089787765916, | |
| "learning_rate": 1.8229838658936566e-05, | |
| "loss": 0.9615086555480957, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.41025641025641024, | |
| "eval_loss": 0.885535478591919, | |
| "eval_runtime": 3.1496, | |
| "eval_samples_per_second": 33.655, | |
| "eval_steps_per_second": 0.318, | |
| "eval_token_acc": 0.760162978607562, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.44871794871794873, | |
| "grad_norm": 0.2761120555741548, | |
| "learning_rate": 1.761712308177359e-05, | |
| "loss": 0.9588653564453125, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 0.2440053485566061, | |
| "learning_rate": 1.6927243535095995e-05, | |
| "loss": 0.9517783164978028, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5769230769230769, | |
| "grad_norm": 0.2512274854383336, | |
| "learning_rate": 1.6167188726285433e-05, | |
| "loss": 0.9253202438354492, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "eval_loss": 0.8652209043502808, | |
| "eval_runtime": 3.1393, | |
| "eval_samples_per_second": 33.766, | |
| "eval_steps_per_second": 0.319, | |
| "eval_token_acc": 0.764574489619886, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.6410256410256411, | |
| "grad_norm": 0.24767273865856176, | |
| "learning_rate": 1.5344658261278013e-05, | |
| "loss": 0.923402214050293, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.7051282051282052, | |
| "grad_norm": 0.22920107782733407, | |
| "learning_rate": 1.4467984645016259e-05, | |
| "loss": 0.9294641494750977, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.2734790600557061, | |
| "learning_rate": 1.3546048870425356e-05, | |
| "loss": 0.9429225921630859, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.8205128205128205, | |
| "eval_loss": 0.8536690473556519, | |
| "eval_runtime": 3.2912, | |
| "eval_samples_per_second": 32.208, | |
| "eval_steps_per_second": 0.304, | |
| "eval_token_acc": 0.7668949204801208, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 0.2458526724445435, | |
| "learning_rate": 1.2588190451025209e-05, | |
| "loss": 0.9190584182739258, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.8974358974358975, | |
| "grad_norm": 0.24846448980401165, | |
| "learning_rate": 1.1604112808577603e-05, | |
| "loss": 0.918177604675293, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.9615384615384616, | |
| "grad_norm": 0.22516966308872668, | |
| "learning_rate": 1.0603784974222862e-05, | |
| "loss": 0.896082878112793, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.0256410256410255, | |
| "grad_norm": 0.28109415933672915, | |
| "learning_rate": 9.597340598905851e-06, | |
| "loss": 0.8887803077697753, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.0256410256410255, | |
| "eval_loss": 0.846965491771698, | |
| "eval_runtime": 3.1974, | |
| "eval_samples_per_second": 33.152, | |
| "eval_steps_per_second": 0.313, | |
| "eval_token_acc": 0.7683886916140407, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.0897435897435896, | |
| "grad_norm": 0.28540387671693573, | |
| "learning_rate": 8.594975296149076e-06, | |
| "loss": 0.8414465904235839, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.1538461538461537, | |
| "grad_norm": 0.2803963137369875, | |
| "learning_rate": 7.606843357124426e-06, | |
| "loss": 0.8364896774291992, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.217948717948718, | |
| "grad_norm": 0.2416844219127749, | |
| "learning_rate": 6.6429548843339554e-06, | |
| "loss": 0.8242883682250977, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "eval_loss": 0.8430743217468262, | |
| "eval_runtime": 3.2098, | |
| "eval_samples_per_second": 33.024, | |
| "eval_steps_per_second": 0.312, | |
| "eval_token_acc": 0.7694875806591539, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.282051282051282, | |
| "grad_norm": 0.2515386758810601, | |
| "learning_rate": 5.713074385969457e-06, | |
| "loss": 0.835479736328125, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.3461538461538463, | |
| "grad_norm": 0.23492463105327158, | |
| "learning_rate": 4.826621858223431e-06, | |
| "loss": 0.8471467018127441, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.4102564102564101, | |
| "grad_norm": 0.2990830422111698, | |
| "learning_rate": 3.99257735762021e-06, | |
| "loss": 0.8314805030822754, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.435897435897436, | |
| "eval_loss": 0.8396947979927063, | |
| "eval_runtime": 3.1689, | |
| "eval_samples_per_second": 33.451, | |
| "eval_steps_per_second": 0.316, | |
| "eval_token_acc": 0.7704079750227107, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.4743589743589745, | |
| "grad_norm": 0.21721857976027215, | |
| "learning_rate": 3.2193900300810908e-06, | |
| "loss": 0.8310983657836915, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 0.21954724710062298, | |
| "learning_rate": 2.514892518288988e-06, | |
| "loss": 0.8174694061279297, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.6025641025641026, | |
| "grad_norm": 0.2130942259472865, | |
| "learning_rate": 1.8862216144342692e-06, | |
| "loss": 0.8244733810424805, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.641025641025641, | |
| "eval_loss": 0.8378878235816956, | |
| "eval_runtime": 3.1707, | |
| "eval_samples_per_second": 33.431, | |
| "eval_steps_per_second": 0.315, | |
| "eval_token_acc": 0.7707161027132189, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.2051128644478209, | |
| "learning_rate": 1.339745962155613e-06, | |
| "loss": 0.8531595230102539, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.7307692307692308, | |
| "grad_norm": 0.2033449517761299, | |
| "learning_rate": 8.810015400790994e-07, | |
| "loss": 0.8210726737976074, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.7948717948717947, | |
| "grad_norm": 0.2108097228299701, | |
| "learning_rate": 5.146355805285452e-07, | |
| "loss": 0.8313694000244141, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "eval_loss": 0.8371573686599731, | |
| "eval_runtime": 3.1872, | |
| "eval_samples_per_second": 33.259, | |
| "eval_steps_per_second": 0.314, | |
| "eval_token_acc": 0.7708367611292432, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 1.858974358974359, | |
| "grad_norm": 0.20286457323683132, | |
| "learning_rate": 2.4435949152906144e-07, | |
| "loss": 0.827159309387207, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.9230769230769231, | |
| "grad_norm": 0.19816039815698575, | |
| "learning_rate": 7.291125901946027e-08, | |
| "loss": 0.8328804016113281, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.9871794871794872, | |
| "grad_norm": 0.20859708124459583, | |
| "learning_rate": 2.0277101514987184e-09, | |
| "loss": 0.824882698059082, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.8369789123535156, | |
| "eval_runtime": 3.1635, | |
| "eval_samples_per_second": 33.508, | |
| "eval_steps_per_second": 0.316, | |
| "eval_token_acc": 0.7708906086868077, | |
| "step": 156 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 156, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3297382913540096.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |