{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.999483204134367,
  "eval_steps": 500,
  "global_step": 967,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0010335917312661498,
      "grad_norm": 0.9090129099183513,
      "learning_rate": 3.3333333333333335e-07,
      "loss": 0.6875,
      "step": 1
    },
    {
      "epoch": 0.0020671834625322996,
      "grad_norm": 0.27089380185556494,
      "learning_rate": 6.666666666666667e-07,
      "loss": 1.0891,
      "step": 2
    },
    {
      "epoch": 0.0031007751937984496,
      "grad_norm": 0.4880556108381944,
      "learning_rate": 1.0000000000000002e-06,
      "loss": 1.1986,
      "step": 3
    },
    {
      "epoch": 0.004134366925064599,
      "grad_norm": 0.49459341043390753,
      "learning_rate": 1.3333333333333334e-06,
      "loss": 0.5008,
      "step": 4
    },
    {
      "epoch": 0.00516795865633075,
      "grad_norm": 0.3101959359524343,
      "learning_rate": 1.6666666666666667e-06,
      "loss": 0.6038,
      "step": 5
    },
    {
      "epoch": 0.006201550387596899,
      "grad_norm": 0.39822915692223254,
      "learning_rate": 2.0000000000000003e-06,
      "loss": 0.659,
      "step": 6
    },
    {
      "epoch": 0.007235142118863049,
      "grad_norm": 0.35071635919169647,
      "learning_rate": 2.3333333333333336e-06,
      "loss": 0.8112,
      "step": 7
    },
    {
      "epoch": 0.008268733850129198,
      "grad_norm": 0.5468564420145585,
      "learning_rate": 2.666666666666667e-06,
      "loss": 0.9821,
      "step": 8
    },
    {
      "epoch": 0.009302325581395349,
      "grad_norm": 0.33137610736236356,
      "learning_rate": 3e-06,
      "loss": 0.5198,
      "step": 9
    },
    {
      "epoch": 0.0103359173126615,
      "grad_norm": 0.739754509340165,
      "learning_rate": 3.3333333333333333e-06,
      "loss": 0.4199,
      "step": 10
    },
    {
      "epoch": 0.011369509043927648,
      "grad_norm": 0.45230246448658645,
      "learning_rate": 3.6666666666666666e-06,
      "loss": 1.1548,
      "step": 11
    },
    {
      "epoch": 0.012403100775193798,
      "grad_norm": 0.4153283956662712,
      "learning_rate": 4.000000000000001e-06,
      "loss": 0.5016,
      "step": 12
    },
    {
      "epoch": 0.013436692506459949,
      "grad_norm": 1.0210646319118475,
      "learning_rate": 4.333333333333334e-06,
      "loss": 0.3881,
      "step": 13
    },
    {
      "epoch": 0.014470284237726097,
      "grad_norm": 0.28982750476437225,
      "learning_rate": 4.666666666666667e-06,
      "loss": 0.4189,
      "step": 14
    },
    {
      "epoch": 0.015503875968992248,
      "grad_norm": 0.21201106803526448,
      "learning_rate": 5e-06,
      "loss": 0.5399,
      "step": 15
    },
    {
      "epoch": 0.016537467700258397,
      "grad_norm": 0.6196157316556428,
      "learning_rate": 5.333333333333334e-06,
      "loss": 0.687,
      "step": 16
    },
    {
      "epoch": 0.01757105943152455,
      "grad_norm": 0.1722850066748571,
      "learning_rate": 5.666666666666667e-06,
      "loss": 0.6763,
      "step": 17
    },
    {
      "epoch": 0.018604651162790697,
      "grad_norm": 0.378201753720806,
      "learning_rate": 6e-06,
      "loss": 0.3821,
      "step": 18
    },
    {
      "epoch": 0.019638242894056846,
      "grad_norm": 2.2221140656964544,
      "learning_rate": 6.333333333333333e-06,
      "loss": 0.8239,
      "step": 19
    },
    {
      "epoch": 0.020671834625323,
      "grad_norm": 0.36272370405550464,
      "learning_rate": 6.666666666666667e-06,
      "loss": 0.9882,
      "step": 20
    },
    {
      "epoch": 0.021705426356589147,
      "grad_norm": 0.6653293165747783,
      "learning_rate": 7e-06,
      "loss": 0.3894,
      "step": 21
    },
    {
      "epoch": 0.022739018087855296,
      "grad_norm": 0.2067102006204865,
      "learning_rate": 7.333333333333333e-06,
      "loss": 0.6442,
      "step": 22
    },
    {
      "epoch": 0.023772609819121448,
      "grad_norm": 0.1980580854899772,
      "learning_rate": 7.666666666666667e-06,
      "loss": 0.3749,
      "step": 23
    },
    {
      "epoch": 0.024806201550387597,
      "grad_norm": 0.11447400792428801,
      "learning_rate": 8.000000000000001e-06,
      "loss": 0.5871,
      "step": 24
    },
    {
      "epoch": 0.025839793281653745,
      "grad_norm": 1.1936762003716521,
      "learning_rate": 8.333333333333334e-06,
      "loss": 0.7059,
      "step": 25
    },
    {
      "epoch": 0.026873385012919897,
      "grad_norm": 0.30631457990408056,
      "learning_rate": 8.666666666666668e-06,
      "loss": 0.7587,
      "step": 26
    },
    {
      "epoch": 0.027906976744186046,
      "grad_norm": 0.5657932799778732,
      "learning_rate": 9e-06,
      "loss": 1.0282,
      "step": 27
    },
    {
      "epoch": 0.028940568475452195,
      "grad_norm": 0.0939770357115336,
      "learning_rate": 9.333333333333334e-06,
      "loss": 0.5661,
      "step": 28
    },
    {
      "epoch": 0.029974160206718347,
      "grad_norm": 0.2775860154781814,
      "learning_rate": 9.666666666666667e-06,
      "loss": 0.7384,
      "step": 29
    },
    {
      "epoch": 0.031007751937984496,
      "grad_norm": 0.2621715420990597,
      "learning_rate": 1e-05,
      "loss": 0.9776,
      "step": 30
    },
    {
      "epoch": 0.03204134366925065,
      "grad_norm": 0.09558511562355317,
      "learning_rate": 9.999971896515836e-06,
      "loss": 0.4562,
      "step": 31
    },
    {
      "epoch": 0.03307493540051679,
      "grad_norm": 0.0404872943502216,
      "learning_rate": 9.999887586379264e-06,
      "loss": 0.7611,
      "step": 32
    },
    {
      "epoch": 0.034108527131782945,
      "grad_norm": 0.061683970035303064,
      "learning_rate": 9.99974707053805e-06,
      "loss": 0.6918,
      "step": 33
    },
    {
      "epoch": 0.0351421188630491,
      "grad_norm": 0.1914561387320027,
      "learning_rate": 9.999550350571785e-06,
      "loss": 0.6041,
      "step": 34
    },
    {
      "epoch": 0.03617571059431524,
      "grad_norm": 0.12051535851563193,
      "learning_rate": 9.999297428691878e-06,
      "loss": 0.6163,
      "step": 35
    },
    {
      "epoch": 0.037209302325581395,
      "grad_norm": 0.20677190366404655,
      "learning_rate": 9.998988307741521e-06,
      "loss": 0.6326,
      "step": 36
    },
    {
      "epoch": 0.03824289405684755,
      "grad_norm": 0.11483760360738193,
      "learning_rate": 9.998622991195668e-06,
      "loss": 0.6454,
      "step": 37
    },
    {
      "epoch": 0.03927648578811369,
      "grad_norm": 0.0598535375220559,
      "learning_rate": 9.998201483160981e-06,
      "loss": 0.8693,
      "step": 38
    },
    {
      "epoch": 0.040310077519379844,
      "grad_norm": 0.5419835653443421,
      "learning_rate": 9.997723788375803e-06,
      "loss": 0.7857,
      "step": 39
    },
    {
      "epoch": 0.041343669250646,
      "grad_norm": 1.3807759715986148,
      "learning_rate": 9.997189912210086e-06,
      "loss": 0.6809,
      "step": 40
    },
    {
      "epoch": 0.04237726098191214,
      "grad_norm": 0.2976108311548857,
      "learning_rate": 9.996599860665342e-06,
      "loss": 1.149,
      "step": 41
    },
    {
      "epoch": 0.043410852713178294,
      "grad_norm": 0.0722625783106846,
      "learning_rate": 9.995953640374574e-06,
      "loss": 0.7663,
      "step": 42
    },
    {
      "epoch": 0.044444444444444446,
      "grad_norm": 0.391708278691,
      "learning_rate": 9.9952512586022e-06,
      "loss": 0.7576,
      "step": 43
    },
    {
      "epoch": 0.04547803617571059,
      "grad_norm": 0.27357048651375615,
      "learning_rate": 9.994492723243965e-06,
      "loss": 1.2711,
      "step": 44
    },
    {
      "epoch": 0.046511627906976744,
      "grad_norm": 0.25921754899158844,
      "learning_rate": 9.993678042826869e-06,
      "loss": 0.5943,
      "step": 45
    },
    {
      "epoch": 0.047545219638242896,
      "grad_norm": 0.6206605426118155,
      "learning_rate": 9.99280722650905e-06,
      "loss": 0.6986,
      "step": 46
    },
    {
      "epoch": 0.04857881136950904,
      "grad_norm": 0.25707207671871285,
      "learning_rate": 9.991880284079704e-06,
      "loss": 0.3929,
      "step": 47
    },
    {
      "epoch": 0.04961240310077519,
      "grad_norm": 0.23354339500277513,
      "learning_rate": 9.99089722595895e-06,
      "loss": 0.7684,
      "step": 48
    },
    {
      "epoch": 0.050645994832041345,
      "grad_norm": 1.3418165813576552,
      "learning_rate": 9.989858063197735e-06,
      "loss": 0.4631,
      "step": 49
    },
    {
      "epoch": 0.05167958656330749,
      "grad_norm": 0.04272899199707252,
      "learning_rate": 9.988762807477694e-06,
      "loss": 0.8219,
      "step": 50
    },
    {
      "epoch": 0.05271317829457364,
      "grad_norm": 0.3711392471056786,
      "learning_rate": 9.987611471111027e-06,
      "loss": 0.3626,
      "step": 51
    },
    {
      "epoch": 0.053746770025839795,
      "grad_norm": 0.053524269158534844,
      "learning_rate": 9.986404067040363e-06,
      "loss": 0.7619,
      "step": 52
    },
    {
      "epoch": 0.05478036175710594,
      "grad_norm": 0.08404269087871034,
      "learning_rate": 9.985140608838604e-06,
      "loss": 0.8367,
      "step": 53
    },
    {
      "epoch": 0.05581395348837209,
      "grad_norm": 0.012871408808671965,
      "learning_rate": 9.98382111070878e-06,
      "loss": 0.8153,
      "step": 54
    },
    {
      "epoch": 0.056847545219638244,
      "grad_norm": 0.06418700758743964,
      "learning_rate": 9.982445587483893e-06,
      "loss": 0.5435,
      "step": 55
    },
    {
      "epoch": 0.05788113695090439,
      "grad_norm": 0.03976783627106558,
      "learning_rate": 9.981014054626737e-06,
      "loss": 0.8318,
      "step": 56
    },
    {
      "epoch": 0.05891472868217054,
      "grad_norm": 0.01779597201722654,
      "learning_rate": 9.979526528229737e-06,
      "loss": 0.5693,
      "step": 57
    },
    {
      "epoch": 0.059948320413436694,
      "grad_norm": 0.005490978049849116,
      "learning_rate": 9.977983025014765e-06,
      "loss": 0.9856,
      "step": 58
    },
    {
      "epoch": 0.06098191214470284,
      "grad_norm": 0.00869732871456688,
      "learning_rate": 9.976383562332946e-06,
      "loss": 0.7282,
      "step": 59
    },
    {
      "epoch": 0.06201550387596899,
      "grad_norm": 0.005884279478506975,
      "learning_rate": 9.974728158164471e-06,
      "loss": 0.5328,
      "step": 60
    },
    {
      "epoch": 0.06304909560723514,
      "grad_norm": 0.0031824656456418124,
      "learning_rate": 9.973016831118389e-06,
      "loss": 0.3528,
      "step": 61
    },
    {
      "epoch": 0.0640826873385013,
      "grad_norm": 0.0031950916905932433,
      "learning_rate": 9.971249600432403e-06,
      "loss": 0.9079,
      "step": 62
    },
    {
      "epoch": 0.06511627906976744,
      "grad_norm": 1.865416770240149,
      "learning_rate": 9.969426485972645e-06,
      "loss": 0.7145,
      "step": 63
    },
    {
      "epoch": 0.06614987080103359,
      "grad_norm": 0.02537300672861243,
      "learning_rate": 9.967547508233466e-06,
      "loss": 0.9373,
      "step": 64
    },
    {
      "epoch": 0.06718346253229975,
      "grad_norm": 0.004809868045747779,
      "learning_rate": 9.965612688337194e-06,
      "loss": 0.5011,
      "step": 65
    },
    {
      "epoch": 0.06821705426356589,
      "grad_norm": 0.016863400818420367,
      "learning_rate": 9.9636220480339e-06,
      "loss": 0.4103,
      "step": 66
    },
    {
      "epoch": 0.06925064599483204,
      "grad_norm": 0.005079722110637466,
      "learning_rate": 9.961575609701154e-06,
      "loss": 0.7274,
      "step": 67
    },
    {
      "epoch": 0.0702842377260982,
      "grad_norm": 0.027816824236486157,
      "learning_rate": 9.959473396343777e-06,
      "loss": 0.4467,
      "step": 68
    },
    {
      "epoch": 0.07131782945736434,
      "grad_norm": 0.0077521416953004385,
      "learning_rate": 9.957315431593578e-06,
      "loss": 0.691,
      "step": 69
    },
    {
      "epoch": 0.07235142118863049,
      "grad_norm": 0.011619270394371323,
      "learning_rate": 9.955101739709085e-06,
      "loss": 0.4226,
      "step": 70
    },
    {
      "epoch": 0.07338501291989664,
      "grad_norm": 0.019350981400024333,
      "learning_rate": 9.952832345575283e-06,
      "loss": 0.8486,
      "step": 71
    },
    {
      "epoch": 0.07441860465116279,
      "grad_norm": 0.10032453006309915,
      "learning_rate": 9.950507274703323e-06,
      "loss": 0.6854,
      "step": 72
    },
    {
      "epoch": 0.07545219638242893,
      "grad_norm": 0.0010615334637522776,
      "learning_rate": 9.948126553230242e-06,
      "loss": 0.6599,
      "step": 73
    },
    {
      "epoch": 0.0764857881136951,
      "grad_norm": 0.014101294410095597,
      "learning_rate": 9.945690207918667e-06,
      "loss": 0.8761,
      "step": 74
    },
    {
      "epoch": 0.07751937984496124,
      "grad_norm": 0.06623673555925402,
      "learning_rate": 9.943198266156517e-06,
      "loss": 0.877,
      "step": 75
    },
    {
      "epoch": 0.07855297157622738,
      "grad_norm": 0.008792270759387424,
      "learning_rate": 9.940650755956686e-06,
      "loss": 0.6514,
      "step": 76
    },
    {
      "epoch": 0.07958656330749354,
      "grad_norm": 0.1532706962823583,
      "learning_rate": 9.938047705956746e-06,
      "loss": 1.0751,
      "step": 77
    },
    {
      "epoch": 0.08062015503875969,
      "grad_norm": 0.005303260675332463,
      "learning_rate": 9.935389145418599e-06,
      "loss": 0.936,
      "step": 78
    },
    {
      "epoch": 0.08165374677002583,
      "grad_norm": 0.020125884810280253,
      "learning_rate": 9.932675104228177e-06,
      "loss": 1.077,
      "step": 79
    },
    {
      "epoch": 0.082687338501292,
      "grad_norm": 0.005122113376444265,
      "learning_rate": 9.929905612895082e-06,
      "loss": 0.7111,
      "step": 80
    },
    {
      "epoch": 0.08372093023255814,
      "grad_norm": 0.004439071455302085,
      "learning_rate": 9.927080702552256e-06,
      "loss": 0.3732,
      "step": 81
    },
    {
      "epoch": 0.08475452196382428,
      "grad_norm": 0.07002936438988991,
      "learning_rate": 9.924200404955628e-06,
      "loss": 0.6678,
      "step": 82
    },
    {
      "epoch": 0.08578811369509044,
      "grad_norm": 0.09328636170275496,
      "learning_rate": 9.921264752483761e-06,
      "loss": 0.4586,
      "step": 83
    },
    {
      "epoch": 0.08682170542635659,
      "grad_norm": 0.03127067301397662,
      "learning_rate": 9.918273778137477e-06,
      "loss": 0.6552,
      "step": 84
    },
    {
      "epoch": 0.08785529715762273,
      "grad_norm": 0.019424366390746833,
      "learning_rate": 9.915227515539497e-06,
      "loss": 0.9721,
      "step": 85
    },
    {
      "epoch": 0.08888888888888889,
      "grad_norm": 0.045440933922917544,
      "learning_rate": 9.912125998934055e-06,
      "loss": 0.6345,
      "step": 86
    },
    {
      "epoch": 0.08992248062015504,
      "grad_norm": 0.03728246193724178,
      "learning_rate": 9.908969263186525e-06,
      "loss": 0.725,
      "step": 87
    },
    {
      "epoch": 0.09095607235142118,
      "grad_norm": 0.05926229596506526,
      "learning_rate": 9.905757343783014e-06,
      "loss": 0.5781,
      "step": 88
    },
    {
      "epoch": 0.09198966408268734,
      "grad_norm": 0.007588286024152401,
      "learning_rate": 9.90249027682997e-06,
      "loss": 1.0342,
      "step": 89
    },
    {
      "epoch": 0.09302325581395349,
      "grad_norm": 0.1626851045510197,
      "learning_rate": 9.899168099053784e-06,
      "loss": 0.6429,
      "step": 90
    },
    {
      "epoch": 0.09405684754521963,
      "grad_norm": 0.030599879387829165,
      "learning_rate": 9.895790847800361e-06,
      "loss": 0.4736,
      "step": 91
    },
    {
      "epoch": 0.09509043927648579,
      "grad_norm": 0.08585241020403361,
      "learning_rate": 9.892358561034713e-06,
      "loss": 1.148,
      "step": 92
    },
    {
      "epoch": 0.09612403100775194,
      "grad_norm": 0.0125460558309209,
      "learning_rate": 9.888871277340522e-06,
      "loss": 0.9593,
      "step": 93
    },
    {
      "epoch": 0.09715762273901808,
      "grad_norm": 0.014596362763371356,
      "learning_rate": 9.885329035919724e-06,
      "loss": 0.621,
      "step": 94
    },
    {
      "epoch": 0.09819121447028424,
      "grad_norm": 0.024152611883093273,
      "learning_rate": 9.881731876592046e-06,
      "loss": 0.3696,
      "step": 95
    },
    {
      "epoch": 0.09922480620155039,
      "grad_norm": 0.024871942451281805,
      "learning_rate": 9.878079839794572e-06,
      "loss": 0.8185,
      "step": 96
    },
    {
      "epoch": 0.10025839793281653,
      "grad_norm": 0.01161714173899997,
      "learning_rate": 9.874372966581285e-06,
      "loss": 0.5902,
      "step": 97
    },
    {
      "epoch": 0.10129198966408269,
      "grad_norm": 0.040274456976341154,
      "learning_rate": 9.870611298622606e-06,
      "loss": 0.7325,
      "step": 98
    },
    {
      "epoch": 0.10232558139534884,
      "grad_norm": 0.02784540465261352,
      "learning_rate": 9.866794878204926e-06,
      "loss": 0.738,
      "step": 99
    },
    {
      "epoch": 0.10335917312661498,
      "grad_norm": 0.01880375856390902,
      "learning_rate": 9.862923748230128e-06,
      "loss": 0.286,
      "step": 100
    },
    {
      "epoch": 0.10439276485788114,
      "grad_norm": 0.02590038239721099,
      "learning_rate": 9.858997952215112e-06,
      "loss": 0.6919,
      "step": 101
    },
    {
      "epoch": 0.10542635658914729,
      "grad_norm": 0.10068895707568987,
      "learning_rate": 9.855017534291293e-06,
      "loss": 1.0573,
      "step": 102
    },
    {
      "epoch": 0.10645994832041343,
      "grad_norm": 0.009141589249859753,
      "learning_rate": 9.850982539204115e-06,
      "loss": 0.7481,
      "step": 103
    },
    {
      "epoch": 0.10749354005167959,
      "grad_norm": 0.1740655019851586,
      "learning_rate": 9.846893012312549e-06,
      "loss": 0.4899,
      "step": 104
    },
    {
      "epoch": 0.10852713178294573,
      "grad_norm": 0.17144774201960322,
      "learning_rate": 9.842748999588575e-06,
      "loss": 0.6173,
      "step": 105
    },
    {
      "epoch": 0.10956072351421188,
      "grad_norm": 0.13835126963971783,
      "learning_rate": 9.838550547616671e-06,
      "loss": 0.7531,
      "step": 106
    },
    {
      "epoch": 0.11059431524547804,
      "grad_norm": 0.0923036982284792,
      "learning_rate": 9.83429770359329e-06,
      "loss": 0.5774,
      "step": 107
    },
    {
      "epoch": 0.11162790697674418,
      "grad_norm": 0.05366991422025271,
      "learning_rate": 9.829990515326324e-06,
      "loss": 0.6863,
      "step": 108
    },
    {
      "epoch": 0.11266149870801033,
      "grad_norm": 0.06790337422134408,
      "learning_rate": 9.825629031234574e-06,
      "loss": 0.754,
      "step": 109
    },
    {
      "epoch": 0.11369509043927649,
      "grad_norm": 0.0043102575996633976,
      "learning_rate": 9.821213300347198e-06,
      "loss": 0.7151,
      "step": 110
    },
    {
      "epoch": 0.11472868217054263,
      "grad_norm": 0.005297749745927153,
      "learning_rate": 9.816743372303166e-06,
      "loss": 0.9024,
      "step": 111
    },
    {
      "epoch": 0.11576227390180878,
      "grad_norm": 0.060182593772243845,
      "learning_rate": 9.812219297350699e-06,
      "loss": 0.6736,
      "step": 112
    },
    {
      "epoch": 0.11679586563307494,
      "grad_norm": 0.09251030740902857,
      "learning_rate": 9.807641126346704e-06,
      "loss": 0.7269,
      "step": 113
    },
    {
      "epoch": 0.11782945736434108,
      "grad_norm": 0.17681770283200476,
      "learning_rate": 9.803008910756203e-06,
      "loss": 0.4566,
      "step": 114
    },
    {
      "epoch": 0.11886304909560723,
      "grad_norm": 0.011119102716883622,
      "learning_rate": 9.798322702651754e-06,
      "loss": 0.9941,
      "step": 115
    },
    {
      "epoch": 0.11989664082687339,
      "grad_norm": 0.05416320544750928,
      "learning_rate": 9.793582554712873e-06,
      "loss": 0.9525,
      "step": 116
    },
    {
      "epoch": 0.12093023255813953,
      "grad_norm": 0.02089587039628738,
      "learning_rate": 9.788788520225421e-06,
      "loss": 1.074,
      "step": 117
    },
    {
      "epoch": 0.12196382428940568,
      "grad_norm": 0.020361919239179075,
      "learning_rate": 9.783940653081033e-06,
      "loss": 0.4797,
      "step": 118
    },
    {
      "epoch": 0.12299741602067184,
      "grad_norm": 0.12938152773395833,
      "learning_rate": 9.779039007776487e-06,
      "loss": 0.4685,
      "step": 119
    },
    {
      "epoch": 0.12403100775193798,
      "grad_norm": 0.04646346070310331,
      "learning_rate": 9.774083639413112e-06,
      "loss": 0.6477,
      "step": 120
    },
    {
      "epoch": 0.12506459948320414,
      "grad_norm": 0.027968188382121582,
      "learning_rate": 9.769074603696153e-06,
      "loss": 0.4681,
      "step": 121
    },
    {
      "epoch": 0.12609819121447027,
      "grad_norm": 0.038768940256862866,
      "learning_rate": 9.764011956934153e-06,
      "loss": 0.8055,
      "step": 122
    },
    {
      "epoch": 0.12713178294573643,
      "grad_norm": 0.1330094630358685,
      "learning_rate": 9.758895756038314e-06,
      "loss": 0.9351,
      "step": 123
    },
    {
      "epoch": 0.1281653746770026,
      "grad_norm": 0.01667862599736821,
      "learning_rate": 9.753726058521868e-06,
      "loss": 0.7428,
      "step": 124
    },
    {
      "epoch": 0.12919896640826872,
      "grad_norm": 0.7369569053525673,
      "learning_rate": 9.748502922499418e-06,
      "loss": 1.0534,
      "step": 125
    },
    {
      "epoch": 0.13023255813953488,
      "grad_norm": 0.1547652254510622,
      "learning_rate": 9.743226406686293e-06,
      "loss": 0.7646,
      "step": 126
    },
    {
      "epoch": 0.13126614987080104,
      "grad_norm": 0.04226382963651684,
      "learning_rate": 9.737896570397885e-06,
      "loss": 1.0278,
      "step": 127
    },
    {
      "epoch": 0.13229974160206717,
      "grad_norm": 0.07043083996098821,
      "learning_rate": 9.73251347354898e-06,
      "loss": 0.4035,
      "step": 128
    },
    {
      "epoch": 0.13333333333333333,
      "grad_norm": 0.02087931401850053,
      "learning_rate": 9.72707717665309e-06,
      "loss": 0.7019,
      "step": 129
    },
    {
      "epoch": 0.1343669250645995,
      "grad_norm": 0.07144193933489354,
      "learning_rate": 9.721587740821768e-06,
      "loss": 0.7888,
      "step": 130
    },
    {
      "epoch": 0.13540051679586562,
      "grad_norm": 0.013012340847501006,
      "learning_rate": 9.716045227763923e-06,
      "loss": 0.9284,
      "step": 131
    },
    {
      "epoch": 0.13643410852713178,
      "grad_norm": 0.011938480381492894,
      "learning_rate": 9.71044969978513e-06,
      "loss": 0.4473,
      "step": 132
    },
    {
      "epoch": 0.13746770025839794,
      "grad_norm": 0.0030055926516396156,
      "learning_rate": 9.704801219786915e-06,
      "loss": 0.8144,
      "step": 133
    },
    {
      "epoch": 0.13850129198966407,
      "grad_norm": 0.04400985548022776,
      "learning_rate": 9.699099851266071e-06,
      "loss": 0.5614,
      "step": 134
    },
    {
      "epoch": 0.13953488372093023,
      "grad_norm": 0.01989204628821268,
      "learning_rate": 9.693345658313923e-06,
      "loss": 0.4959,
      "step": 135
    },
    {
      "epoch": 0.1405684754521964,
      "grad_norm": 0.018364895414391253,
      "learning_rate": 9.68753870561562e-06,
      "loss": 0.6348,
      "step": 136
    },
    {
      "epoch": 0.14160206718346252,
      "grad_norm": 0.015772091992547897,
      "learning_rate": 9.681679058449402e-06,
      "loss": 0.6295,
      "step": 137
    },
    {
      "epoch": 0.14263565891472868,
      "grad_norm": 0.007794376067122658,
      "learning_rate": 9.675766782685874e-06,
      "loss": 0.4606,
      "step": 138
    },
    {
      "epoch": 0.14366925064599484,
      "grad_norm": 0.00968499472651728,
      "learning_rate": 9.669801944787249e-06,
      "loss": 0.6022,
      "step": 139
    },
    {
      "epoch": 0.14470284237726097,
      "grad_norm": 0.011505124213977537,
      "learning_rate": 9.663784611806624e-06,
      "loss": 0.4991,
      "step": 140
    },
    {
      "epoch": 0.14573643410852713,
      "grad_norm": 0.0024427309317885198,
      "learning_rate": 9.657714851387204e-06,
      "loss": 0.8391,
      "step": 141
    },
    {
      "epoch": 0.1467700258397933,
      "grad_norm": 0.0012722550470359636,
      "learning_rate": 9.651592731761554e-06,
      "loss": 0.6516,
      "step": 142
    },
    {
      "epoch": 0.14780361757105942,
      "grad_norm": 0.00835755973542806,
      "learning_rate": 9.645418321750835e-06,
      "loss": 0.7895,
      "step": 143
    },
    {
      "epoch": 0.14883720930232558,
      "grad_norm": 0.0043016381507358,
      "learning_rate": 9.639191690764018e-06,
      "loss": 1.1614,
      "step": 144
    },
    {
      "epoch": 0.14987080103359174,
      "grad_norm": 0.11220816612448069,
      "learning_rate": 9.632912908797116e-06,
      "loss": 0.9535,
      "step": 145
    },
    {
      "epoch": 0.15090439276485787,
      "grad_norm": 0.010918008813352208,
      "learning_rate": 9.626582046432384e-06,
      "loss": 1.2754,
      "step": 146
    },
    {
      "epoch": 0.15193798449612403,
      "grad_norm": 0.01573844403229036,
      "learning_rate": 9.620199174837542e-06,
      "loss": 0.6639,
      "step": 147
    },
    {
      "epoch": 0.1529715762273902,
      "grad_norm": 0.004884339574378068,
      "learning_rate": 9.61376436576496e-06,
      "loss": 0.5425,
      "step": 148
    },
    {
      "epoch": 0.15400516795865632,
      "grad_norm": 0.017728402489478233,
      "learning_rate": 9.607277691550862e-06,
      "loss": 0.5286,
      "step": 149
    },
    {
      "epoch": 0.15503875968992248,
      "grad_norm": 0.004367598814308874,
      "learning_rate": 9.600739225114506e-06,
      "loss": 0.2666,
      "step": 150
    },
    {
      "epoch": 0.15607235142118864,
      "grad_norm": 0.001312271460424776,
      "learning_rate": 9.594149039957366e-06,
      "loss": 0.4213,
      "step": 151
    },
    {
      "epoch": 0.15710594315245477,
      "grad_norm": 0.00660812196722453,
      "learning_rate": 9.587507210162307e-06,
      "loss": 0.5213,
      "step": 152
    },
    {
      "epoch": 0.15813953488372093,
      "grad_norm": 0.0070750119816854916,
      "learning_rate": 9.580813810392755e-06,
      "loss": 0.7082,
      "step": 153
    },
    {
      "epoch": 0.1591731266149871,
      "grad_norm": 0.009928911894794251,
      "learning_rate": 9.57406891589185e-06,
      "loss": 0.3128,
      "step": 154
    },
    {
      "epoch": 0.16020671834625322,
      "grad_norm": 0.05309758029339527,
      "learning_rate": 9.567272602481607e-06,
      "loss": 0.7945,
      "step": 155
    },
    {
      "epoch": 0.16124031007751938,
      "grad_norm": 0.019306720919715675,
      "learning_rate": 9.56042494656206e-06,
      "loss": 0.8446,
      "step": 156
    },
    {
      "epoch": 0.16227390180878554,
      "grad_norm": 0.014443875109030238,
      "learning_rate": 9.553526025110404e-06,
      "loss": 0.5012,
      "step": 157
    },
    {
      "epoch": 0.16330749354005167,
      "grad_norm": 0.02953870280172381,
      "learning_rate": 9.546575915680134e-06,
      "loss": 0.7657,
      "step": 158
    },
    {
      "epoch": 0.16434108527131783,
      "grad_norm": 0.008112451210038032,
      "learning_rate": 9.539574696400165e-06,
      "loss": 0.8669,
      "step": 159
    },
    {
      "epoch": 0.165374677002584,
      "grad_norm": 0.020591092587212517,
      "learning_rate": 9.532522445973956e-06,
      "loss": 0.4167,
      "step": 160
    },
    {
      "epoch": 0.16640826873385012,
      "grad_norm": 0.0027326333963463346,
      "learning_rate": 9.525419243678633e-06,
      "loss": 0.4011,
      "step": 161
    },
    {
      "epoch": 0.16744186046511628,
      "grad_norm": 0.018139580691613727,
      "learning_rate": 9.51826516936409e-06,
      "loss": 0.7733,
      "step": 162
    },
    {
      "epoch": 0.16847545219638244,
      "grad_norm": 0.02037259981950972,
      "learning_rate": 9.51106030345209e-06,
      "loss": 0.3779,
      "step": 163
    },
    {
      "epoch": 0.16950904392764857,
      "grad_norm": 0.0011843009045357316,
      "learning_rate": 9.503804726935369e-06,
      "loss": 0.9776,
      "step": 164
    },
    {
      "epoch": 0.17054263565891473,
      "grad_norm": 0.020276193447650623,
      "learning_rate": 9.496498521376718e-06,
      "loss": 0.6554,
      "step": 165
    },
    {
      "epoch": 0.17157622739018089,
      "grad_norm": 0.007641721600942258,
      "learning_rate": 9.48914176890807e-06,
      "loss": 0.5794,
      "step": 166
    },
    {
      "epoch": 0.17260981912144702,
      "grad_norm": 0.042362757758444336,
      "learning_rate": 9.481734552229578e-06,
      "loss": 1.1943,
      "step": 167
    },
    {
      "epoch": 0.17364341085271318,
      "grad_norm": 0.021030172652131918,
      "learning_rate": 9.474276954608677e-06,
      "loss": 0.4606,
      "step": 168
    },
    {
      "epoch": 0.17467700258397933,
      "grad_norm": 0.025024435854970373,
      "learning_rate": 9.46676905987916e-06,
      "loss": 0.5215,
      "step": 169
    },
    {
      "epoch": 0.17571059431524547,
      "grad_norm": 0.020064884365210946,
      "learning_rate": 9.459210952440226e-06,
      "loss": 0.7753,
      "step": 170
    },
    {
      "epoch": 0.17674418604651163,
      "grad_norm": 0.013090072237976348,
      "learning_rate": 9.451602717255536e-06,
      "loss": 0.7002,
      "step": 171
    },
    {
      "epoch": 0.17777777777777778,
      "grad_norm": 0.02849087875145603,
      "learning_rate": 9.44394443985226e-06,
      "loss": 0.7805,
      "step": 172
    },
    {
      "epoch": 0.17881136950904392,
      "grad_norm": 0.03627885886391268,
      "learning_rate": 9.436236206320104e-06,
      "loss": 0.6136,
      "step": 173
    },
    {
      "epoch": 0.17984496124031008,
      "grad_norm": 0.019127247869784248,
      "learning_rate": 9.428478103310358e-06,
      "loss": 0.9643,
      "step": 174
    },
    {
      "epoch": 0.18087855297157623,
      "grad_norm": 0.0186863582425525,
      "learning_rate": 9.420670218034913e-06,
      "loss": 0.7769,
      "step": 175
    },
    {
      "epoch": 0.18191214470284237,
      "grad_norm": 0.1844420472276426,
      "learning_rate": 9.412812638265279e-06,
      "loss": 0.6734,
      "step": 176
    },
    {
      "epoch": 0.18294573643410852,
      "grad_norm": 0.019593103183060227,
      "learning_rate": 9.404905452331605e-06,
      "loss": 1.0527,
      "step": 177
    },
    {
      "epoch": 0.18397932816537468,
      "grad_norm": 0.02263645297699782,
      "learning_rate": 9.39694874912168e-06,
      "loss": 0.8546,
      "step": 178
    },
    {
      "epoch": 0.18501291989664082,
      "grad_norm": 0.01038196529954136,
      "learning_rate": 9.38894261807994e-06,
      "loss": 0.6681,
      "step": 179
    },
    {
      "epoch": 0.18604651162790697,
      "grad_norm": 0.027888657927517522,
      "learning_rate": 9.380887149206453e-06,
      "loss": 0.7028,
      "step": 180
    },
    {
      "epoch": 0.18708010335917313,
      "grad_norm": 0.04209020694232996,
      "learning_rate": 9.372782433055915e-06,
      "loss": 0.4251,
      "step": 181
    },
    {
      "epoch": 0.18811369509043926,
      "grad_norm": 0.0968371827798092,
      "learning_rate": 9.364628560736631e-06,
      "loss": 0.7704,
      "step": 182
    },
    {
      "epoch": 0.18914728682170542,
      "grad_norm": 0.008430300335674677,
      "learning_rate": 9.356425623909493e-06,
      "loss": 0.4266,
      "step": 183
    },
    {
      "epoch": 0.19018087855297158,
      "grad_norm": 0.13518755233346427,
      "learning_rate": 9.34817371478694e-06,
      "loss": 0.6482,
      "step": 184
    },
    {
      "epoch": 0.19121447028423771,
      "grad_norm": 0.013074876802381619,
      "learning_rate": 9.33987292613193e-06,
      "loss": 1.0393,
      "step": 185
    },
    {
      "epoch": 0.19224806201550387,
      "grad_norm": 0.017653765065432057,
      "learning_rate": 9.331523351256898e-06,
      "loss": 0.7594,
      "step": 186
    },
    {
      "epoch": 0.19328165374677003,
      "grad_norm": 0.009967127278013565,
      "learning_rate": 9.323125084022701e-06,
      "loss": 0.5133,
      "step": 187
    },
    {
      "epoch": 0.19431524547803616,
      "grad_norm": 0.0192882581142254,
      "learning_rate": 9.31467821883757e-06,
      "loss": 0.7868,
      "step": 188
    },
    {
      "epoch": 0.19534883720930232,
      "grad_norm": 0.022187760644189668,
      "learning_rate": 9.306182850656037e-06,
      "loss": 0.7766,
      "step": 189
    },
    {
      "epoch": 0.19638242894056848,
      "grad_norm": 0.014367546823217106,
      "learning_rate": 9.297639074977885e-06,
      "loss": 0.754,
      "step": 190
    },
    {
      "epoch": 0.1974160206718346,
      "grad_norm": 0.00585787674030827,
      "learning_rate": 9.289046987847058e-06,
      "loss": 0.4327,
      "step": 191
    },
    {
      "epoch": 0.19844961240310077,
      "grad_norm": 0.013910889847752175,
      "learning_rate": 9.280406685850587e-06,
      "loss": 0.448,
      "step": 192
    },
    {
      "epoch": 0.19948320413436693,
      "grad_norm": 0.017295733671717405,
      "learning_rate": 9.271718266117512e-06,
      "loss": 0.8502,
      "step": 193
    },
    {
      "epoch": 0.20051679586563306,
      "grad_norm": 0.007366175903201466,
      "learning_rate": 9.262981826317778e-06,
      "loss": 0.7272,
      "step": 194
    },
    {
      "epoch": 0.20155038759689922,
      "grad_norm": 0.04011045999149613,
      "learning_rate": 9.254197464661143e-06,
      "loss": 0.6261,
      "step": 195
    },
    {
      "epoch": 0.20258397932816538,
      "grad_norm": 0.00972312791946162,
      "learning_rate": 9.245365279896077e-06,
      "loss": 0.5686,
      "step": 196
    },
    {
      "epoch": 0.2036175710594315,
      "grad_norm": 0.0448437042220098,
      "learning_rate": 9.236485371308642e-06,
      "loss": 0.5354,
      "step": 197
    },
    {
      "epoch": 0.20465116279069767,
      "grad_norm": 0.027371830265790053,
      "learning_rate": 9.227557838721391e-06,
      "loss": 0.7096,
      "step": 198
    },
    {
      "epoch": 0.20568475452196383,
      "grad_norm": 0.01376717212510206,
      "learning_rate": 9.218582782492228e-06,
      "loss": 1.2325,
      "step": 199
    },
    {
      "epoch": 0.20671834625322996,
      "grad_norm": 0.025208342824461025,
      "learning_rate": 9.209560303513296e-06,
      "loss": 0.7258,
      "step": 200
    },
    {
      "epoch": 0.20775193798449612,
      "grad_norm": 0.023860138576171294,
      "learning_rate": 9.200490503209831e-06,
      "loss": 0.596,
      "step": 201
    },
    {
      "epoch": 0.20878552971576228,
      "grad_norm": 0.004348629637496784,
      "learning_rate": 9.191373483539032e-06,
      "loss": 0.5198,
      "step": 202
    },
    {
      "epoch": 0.2098191214470284,
      "grad_norm": 0.01391899178620393,
      "learning_rate": 9.182209346988901e-06,
      "loss": 0.8346,
      "step": 203
    },
    {
      "epoch": 0.21085271317829457,
      "grad_norm": 0.06658461418835225,
      "learning_rate": 9.17299819657711e-06,
      "loss": 0.8635,
      "step": 204
    },
    {
      "epoch": 0.21188630490956073,
      "grad_norm": 0.03231085596121509,
      "learning_rate": 9.163740135849824e-06,
      "loss": 0.9853,
      "step": 205
    },
    {
      "epoch": 0.21291989664082686,
      "grad_norm": 0.019505527706318526,
      "learning_rate": 9.154435268880547e-06,
      "loss": 0.5693,
      "step": 206
    },
    {
      "epoch": 0.21395348837209302,
      "grad_norm": 0.06266480207021098,
      "learning_rate": 9.145083700268955e-06,
      "loss": 1.2663,
      "step": 207
    },
    {
      "epoch": 0.21498708010335918,
      "grad_norm": 0.05936062307669782,
      "learning_rate": 9.135685535139709e-06,
      "loss": 0.8817,
      "step": 208
    },
    {
      "epoch": 0.2160206718346253,
      "grad_norm": 0.022950589610178967,
      "learning_rate": 9.126240879141286e-06,
      "loss": 0.6411,
      "step": 209
    },
    {
      "epoch": 0.21705426356589147,
      "grad_norm": 0.025670419030162726,
      "learning_rate": 9.116749838444778e-06,
      "loss": 0.9189,
      "step": 210
    },
    {
      "epoch": 0.21808785529715763,
      "grad_norm": 0.059657285074143675,
      "learning_rate": 9.107212519742714e-06,
      "loss": 0.7633,
      "step": 211
    },
    {
      "epoch": 0.21912144702842376,
      "grad_norm": 0.09059261821220524,
      "learning_rate": 9.097629030247846e-06,
      "loss": 0.4377,
      "step": 212
    },
    {
      "epoch": 0.22015503875968992,
      "grad_norm": 0.04863346783391407,
      "learning_rate": 9.087999477691953e-06,
      "loss": 0.6508,
      "step": 213
    },
    {
      "epoch": 0.22118863049095608,
      "grad_norm": 0.008703483189895118,
      "learning_rate": 9.078323970324626e-06,
      "loss": 0.3863,
      "step": 214
    },
    {
      "epoch": 0.2222222222222222,
      "grad_norm": 0.056317924492292545,
      "learning_rate": 9.06860261691205e-06,
      "loss": 0.919,
      "step": 215
    },
    {
      "epoch": 0.22325581395348837,
      "grad_norm": 0.013052710495393343,
      "learning_rate": 9.058835526735788e-06,
      "loss": 1.2502,
      "step": 216
    },
    {
      "epoch": 0.22428940568475453,
      "grad_norm": 0.004326147526087374,
      "learning_rate": 9.049022809591546e-06,
      "loss": 0.5343,
      "step": 217
    },
    {
      "epoch": 0.22532299741602066,
      "grad_norm": 0.006135603589900725,
      "learning_rate": 9.039164575787937e-06,
      "loss": 0.5402,
      "step": 218
    },
    {
      "epoch": 0.22635658914728682,
      "grad_norm": 0.019090849246119757,
      "learning_rate": 9.029260936145252e-06,
      "loss": 0.9132,
      "step": 219
    },
    {
      "epoch": 0.22739018087855298,
      "grad_norm": 0.02691653993430469,
      "learning_rate": 9.019312001994203e-06,
      "loss": 0.4801,
      "step": 220
    },
    {
      "epoch": 0.2284237726098191,
      "grad_norm": 0.01918427295307592,
      "learning_rate": 9.009317885174672e-06,
      "loss": 1.2181,
      "step": 221
    },
    {
      "epoch": 0.22945736434108527,
      "grad_norm": 0.027554614002697222,
      "learning_rate": 8.999278698034462e-06,
      "loss": 1.0221,
      "step": 222
    },
    {
      "epoch": 0.23049095607235143,
      "grad_norm": 0.016185639010448363,
      "learning_rate": 8.989194553428028e-06,
      "loss": 0.5952,
      "step": 223
    },
    {
      "epoch": 0.23152454780361756,
      "grad_norm": 0.007053727347957941,
      "learning_rate": 8.979065564715209e-06,
      "loss": 0.732,
      "step": 224
    },
    {
      "epoch": 0.23255813953488372,
      "grad_norm": 0.029493371609791846,
      "learning_rate": 8.968891845759955e-06,
      "loss": 0.8131,
      "step": 225
    },
    {
      "epoch": 0.23359173126614988,
      "grad_norm": 0.019419640085101688,
      "learning_rate": 8.958673510929046e-06,
      "loss": 1.0094,
      "step": 226
    },
    {
      "epoch": 0.234625322997416,
      "grad_norm": 0.004939171298142091,
      "learning_rate": 8.948410675090807e-06,
      "loss": 0.5828,
      "step": 227
    },
    {
      "epoch": 0.23565891472868217,
      "grad_norm": 0.022769307077674168,
      "learning_rate": 8.938103453613814e-06,
      "loss": 0.7133,
      "step": 228
    },
    {
      "epoch": 0.23669250645994833,
      "grad_norm": 0.009387312662167645,
      "learning_rate": 8.927751962365603e-06,
      "loss": 0.7452,
      "step": 229
    },
    {
      "epoch": 0.23772609819121446,
      "grad_norm": 0.005716346929830633,
      "learning_rate": 8.917356317711359e-06,
      "loss": 0.7965,
      "step": 230
    },
    {
      "epoch": 0.23875968992248062,
      "grad_norm": 0.021429105348464574,
      "learning_rate": 8.906916636512618e-06,
      "loss": 0.7036,
      "step": 231
    },
    {
      "epoch": 0.23979328165374678,
      "grad_norm": 0.014326380698753916,
      "learning_rate": 8.89643303612595e-06,
      "loss": 0.9994,
      "step": 232
    },
    {
      "epoch": 0.2408268733850129,
      "grad_norm": 0.013104091333791243,
      "learning_rate": 8.885905634401629e-06,
      "loss": 0.5784,
      "step": 233
    },
    {
      "epoch": 0.24186046511627907,
      "grad_norm": 0.03665635333924465,
      "learning_rate": 8.875334549682322e-06,
      "loss": 0.398,
      "step": 234
    },
    {
      "epoch": 0.24289405684754523,
      "grad_norm": 0.010720581915937645,
      "learning_rate": 8.864719900801755e-06,
      "loss": 0.4964,
      "step": 235
    },
    {
      "epoch": 0.24392764857881136,
      "grad_norm": 0.016675861124065116,
      "learning_rate": 8.854061807083376e-06,
      "loss": 0.4495,
      "step": 236
    },
    {
      "epoch": 0.24496124031007752,
      "grad_norm": 0.01994733104283364,
      "learning_rate": 8.84336038833901e-06,
      "loss": 0.5189,
      "step": 237
    },
    {
      "epoch": 0.24599483204134368,
      "grad_norm": 0.7057085900286534,
      "learning_rate": 8.832615764867521e-06,
      "loss": 0.6756,
      "step": 238
    },
    {
      "epoch": 0.2470284237726098,
      "grad_norm": 0.021409456905475494,
      "learning_rate": 8.821828057453448e-06,
      "loss": 0.9047,
      "step": 239
    },
    {
      "epoch": 0.24806201550387597,
      "grad_norm": 0.03477332918662102,
      "learning_rate": 8.810997387365656e-06,
      "loss": 0.8499,
      "step": 240
    },
    {
      "epoch": 0.24909560723514212,
      "grad_norm": 0.017057725045423664,
      "learning_rate": 8.800123876355976e-06,
      "loss": 0.7113,
      "step": 241
    },
    {
      "epoch": 0.2501291989664083,
      "grad_norm": 0.03237499631874885,
      "learning_rate": 8.789207646657823e-06,
      "loss": 0.6451,
      "step": 242
    },
    {
      "epoch": 0.25116279069767444,
      "grad_norm": 0.030433718328767626,
      "learning_rate": 8.778248820984829e-06,
      "loss": 0.7878,
      "step": 243
    },
    {
      "epoch": 0.25219638242894055,
      "grad_norm": 0.013152906932054858,
      "learning_rate": 8.767247522529473e-06,
      "loss": 0.5596,
      "step": 244
    },
    {
      "epoch": 0.2532299741602067,
      "grad_norm": 0.012622467896971874,
      "learning_rate": 8.75620387496168e-06,
      "loss": 0.4429,
      "step": 245
    },
    {
      "epoch": 0.25426356589147286,
      "grad_norm": 0.02512700310633117,
      "learning_rate": 8.74511800242744e-06,
      "loss": 0.7501,
      "step": 246
    },
    {
      "epoch": 0.255297157622739,
      "grad_norm": 0.0012554591916005735,
      "learning_rate": 8.733990029547408e-06,
      "loss": 0.8863,
      "step": 247
    },
    {
      "epoch": 0.2563307493540052,
      "grad_norm": 0.017537368874946,
      "learning_rate": 8.72282008141551e-06,
      "loss": 0.8266,
      "step": 248
    },
    {
      "epoch": 0.25736434108527134,
      "grad_norm": 0.011104763009538507,
      "learning_rate": 8.71160828359753e-06,
      "loss": 0.6835,
      "step": 249
    },
    {
      "epoch": 0.25839793281653745,
      "grad_norm": 0.009815236737431102,
      "learning_rate": 8.7003547621297e-06,
      "loss": 0.9261,
      "step": 250
    },
    {
      "epoch": 0.2594315245478036,
      "grad_norm": 0.00870558888079472,
      "learning_rate": 8.689059643517285e-06,
      "loss": 0.3867,
      "step": 251
    },
    {
      "epoch": 0.26046511627906976,
      "grad_norm": 0.0026826494043887253,
      "learning_rate": 8.677723054733163e-06,
      "loss": 0.9743,
      "step": 252
    },
    {
      "epoch": 0.2614987080103359,
      "grad_norm": 0.01095035589445595,
      "learning_rate": 8.666345123216387e-06,
      "loss": 0.6548,
      "step": 253
    },
    {
      "epoch": 0.2625322997416021,
      "grad_norm": 0.010948308867895851,
      "learning_rate": 8.654925976870766e-06,
      "loss": 1.1087,
      "step": 254
    },
    {
      "epoch": 0.26356589147286824,
      "grad_norm": 0.018026955362531117,
      "learning_rate": 8.64346574406342e-06,
      "loss": 0.993,
      "step": 255
    },
    {
      "epoch": 0.26459948320413434,
      "grad_norm": 0.00658806042654425,
      "learning_rate": 8.631964553623336e-06,
      "loss": 0.6475,
      "step": 256
    },
    {
      "epoch": 0.2656330749354005,
      "grad_norm": 0.004084675768487745,
      "learning_rate": 8.620422534839925e-06,
      "loss": 0.7668,
      "step": 257
    },
    {
      "epoch": 0.26666666666666666,
      "grad_norm": 0.0127875051460257,
      "learning_rate": 8.608839817461565e-06,
      "loss": 0.8424,
      "step": 258
    },
    {
      "epoch": 0.2677002583979328,
      "grad_norm": 0.01107652911515069,
      "learning_rate": 8.597216531694136e-06,
      "loss": 0.816,
      "step": 259
    },
    {
      "epoch": 0.268733850129199,
      "grad_norm": 0.010534807390693813,
      "learning_rate": 8.585552808199577e-06,
      "loss": 0.9236,
      "step": 260
    },
    {
      "epoch": 0.26976744186046514,
      "grad_norm": 0.004415086627851708,
      "learning_rate": 8.57384877809439e-06,
      "loss": 0.8157,
      "step": 261
    },
    {
      "epoch": 0.27080103359173124,
      "grad_norm": 0.006125300228982346,
      "learning_rate": 8.562104572948185e-06,
      "loss": 0.384,
      "step": 262
    },
    {
      "epoch": 0.2718346253229974,
      "grad_norm": 0.03289252738757519,
      "learning_rate": 8.550320324782198e-06,
      "loss": 0.4705,
      "step": 263
    },
    {
      "epoch": 0.27286821705426356,
      "grad_norm": 0.015276078793441526,
      "learning_rate": 8.538496166067798e-06,
      "loss": 0.4077,
      "step": 264
    },
    {
      "epoch": 0.2739018087855297,
      "grad_norm": 0.0432377425897138,
      "learning_rate": 8.526632229725012e-06,
      "loss": 0.7429,
      "step": 265
    },
    {
      "epoch": 0.2749354005167959,
      "grad_norm": 0.012739260695027425,
      "learning_rate": 8.514728649121017e-06,
      "loss": 0.8007,
      "step": 266
    },
    {
      "epoch": 0.27596899224806204,
      "grad_norm": 0.01611807702071134,
      "learning_rate": 8.50278555806865e-06,
      "loss": 0.4674,
      "step": 267
    },
    {
      "epoch": 0.27700258397932814,
      "grad_norm": 0.008070552562490414,
      "learning_rate": 8.490803090824895e-06,
      "loss": 1.0095,
      "step": 268
    },
    {
      "epoch": 0.2780361757105943,
      "grad_norm": 0.01245321817309244,
      "learning_rate": 8.478781382089387e-06,
      "loss": 0.8956,
      "step": 269
    },
    {
      "epoch": 0.27906976744186046,
      "grad_norm": 0.011726748347443113,
      "learning_rate": 8.466720567002887e-06,
      "loss": 0.6895,
      "step": 270
    },
    {
      "epoch": 0.2801033591731266,
      "grad_norm": 0.005292481268583541,
      "learning_rate": 8.454620781145761e-06,
      "loss": 0.8383,
      "step": 271
    },
    {
      "epoch": 0.2811369509043928,
      "grad_norm": 0.012062842232738685,
      "learning_rate": 8.442482160536469e-06,
      "loss": 0.7233,
      "step": 272
    },
    {
      "epoch": 0.28217054263565894,
      "grad_norm": 0.035075841318575364,
      "learning_rate": 8.430304841630024e-06,
      "loss": 0.7745,
      "step": 273
    },
    {
      "epoch": 0.28320413436692504,
      "grad_norm": 0.01621737213548447,
      "learning_rate": 8.418088961316459e-06,
      "loss": 0.9643,
      "step": 274
    },
    {
      "epoch": 0.2842377260981912,
      "grad_norm": 0.13770137238981514,
      "learning_rate": 8.405834656919295e-06,
      "loss": 0.5721,
      "step": 275
    },
    {
      "epoch": 0.28527131782945736,
      "grad_norm": 0.009851743707365591,
      "learning_rate": 8.393542066193994e-06,
      "loss": 0.9355,
      "step": 276
    },
    {
      "epoch": 0.2863049095607235,
      "grad_norm": 0.03185248807282835,
      "learning_rate": 8.381211327326403e-06,
      "loss": 0.6489,
      "step": 277
    },
    {
      "epoch": 0.2873385012919897,
      "grad_norm": 0.003400971572838479,
      "learning_rate": 8.368842578931214e-06,
      "loss": 0.5619,
      "step": 278
    },
    {
      "epoch": 0.28837209302325584,
      "grad_norm": 0.017968304843218753,
      "learning_rate": 8.356435960050398e-06,
      "loss": 0.3446,
      "step": 279
    },
    {
      "epoch": 0.28940568475452194,
      "grad_norm": 0.01357385951836592,
      "learning_rate": 8.34399161015164e-06,
      "loss": 0.5631,
      "step": 280
    },
    {
      "epoch": 0.2904392764857881,
      "grad_norm": 0.10987377474548056,
      "learning_rate": 8.331509669126778e-06,
      "loss": 0.4986,
      "step": 281
    },
    {
      "epoch": 0.29147286821705426,
      "grad_norm": 0.013289043438300894,
      "learning_rate": 8.318990277290224e-06,
      "loss": 0.8155,
      "step": 282
    },
    {
      "epoch": 0.2925064599483204,
      "grad_norm": 0.02724831490571318,
      "learning_rate": 8.306433575377388e-06,
      "loss": 0.6345,
      "step": 283
    },
    {
      "epoch": 0.2935400516795866,
      "grad_norm": 0.004804539734257983,
      "learning_rate": 8.293839704543103e-06,
      "loss": 0.7433,
      "step": 284
    },
    {
      "epoch": 0.29457364341085274,
      "grad_norm": 0.03623587061850478,
      "learning_rate": 8.281208806360028e-06,
      "loss": 0.5445,
      "step": 285
    },
    {
      "epoch": 0.29560723514211884,
      "grad_norm": 0.005070069391571561,
      "learning_rate": 8.268541022817058e-06,
      "loss": 0.4421,
      "step": 286
    },
    {
      "epoch": 0.296640826873385,
      "grad_norm": 0.010976296958017995,
      "learning_rate": 8.255836496317739e-06,
      "loss": 0.9924,
      "step": 287
    },
    {
      "epoch": 0.29767441860465116,
      "grad_norm": 0.007483812700653088,
      "learning_rate": 8.243095369678653e-06,
      "loss": 0.4191,
      "step": 288
    },
    {
      "epoch": 0.2987080103359173,
      "grad_norm": 0.06771954884339798,
      "learning_rate": 8.230317786127822e-06,
      "loss": 0.8462,
      "step": 289
    },
    {
      "epoch": 0.2997416020671835,
      "grad_norm": 0.01061239920130858,
      "learning_rate": 8.217503889303088e-06,
      "loss": 0.4135,
      "step": 290
    },
    {
      "epoch": 0.30077519379844964,
      "grad_norm": 0.00800392710424375,
      "learning_rate": 8.204653823250516e-06,
      "loss": 0.8297,
      "step": 291
    },
    {
      "epoch": 0.30180878552971574,
      "grad_norm": 0.015360784142070882,
      "learning_rate": 8.191767732422754e-06,
      "loss": 0.4854,
      "step": 292
    },
    {
      "epoch": 0.3028423772609819,
      "grad_norm": 0.009426769748860628,
      "learning_rate": 8.17884576167742e-06,
      "loss": 0.6999,
      "step": 293
    },
    {
      "epoch": 0.30387596899224806,
      "grad_norm": 0.008154700146080874,
      "learning_rate": 8.165888056275478e-06,
      "loss": 0.372,
      "step": 294
    },
    {
      "epoch": 0.3049095607235142,
      "grad_norm": 0.003741755388621327,
      "learning_rate": 8.152894761879593e-06,
      "loss": 0.9781,
      "step": 295
    },
    {
      "epoch": 0.3059431524547804,
      "grad_norm": 0.03314340319472921,
      "learning_rate": 8.1398660245525e-06,
      "loss": 0.7189,
      "step": 296
    },
    {
      "epoch": 0.30697674418604654,
      "grad_norm": 0.0043097517071250925,
      "learning_rate": 8.126801990755371e-06,
      "loss": 0.7041,
      "step": 297
    },
    {
      "epoch": 0.30801033591731264,
      "grad_norm": 0.0147645408246627,
      "learning_rate": 8.113702807346147e-06,
      "loss": 0.6921,
      "step": 298
    },
    {
      "epoch": 0.3090439276485788,
      "grad_norm": 0.040485500550102096,
      "learning_rate": 8.100568621577907e-06,
      "loss": 0.3761,
      "step": 299
    },
    {
      "epoch": 0.31007751937984496,
      "grad_norm": 0.0019354774663355987,
      "learning_rate": 8.087399581097205e-06,
      "loss": 0.5642,
      "step": 300
    },
    {
      "epoch": 0.3111111111111111,
      "grad_norm": 0.006167650340911832,
      "learning_rate": 8.074195833942405e-06,
      "loss": 0.6067,
      "step": 301
    },
    {
      "epoch": 0.3121447028423773,
      "grad_norm": 0.006139448424181857,
      "learning_rate": 8.060957528542032e-06,
      "loss": 0.6789,
      "step": 302
    },
    {
      "epoch": 0.31317829457364343,
      "grad_norm": 0.016843741386875936,
      "learning_rate": 8.047684813713086e-06,
      "loss": 0.5894,
      "step": 303
    },
    {
      "epoch": 0.31421188630490954,
      "grad_norm": 0.03491200858851756,
      "learning_rate": 8.03437783865938e-06,
      "loss": 0.9153,
      "step": 304
    },
    {
      "epoch": 0.3152454780361757,
      "grad_norm": 0.016584072345741992,
      "learning_rate": 8.021036752969859e-06,
      "loss": 1.4972,
      "step": 305
    },
    {
      "epoch": 0.31627906976744186,
      "grad_norm": 0.02360601294451115,
      "learning_rate": 8.007661706616919e-06,
      "loss": 0.9334,
      "step": 306
    },
    {
      "epoch": 0.317312661498708,
      "grad_norm": 0.006462197903116415,
      "learning_rate": 7.99425284995472e-06,
      "loss": 0.6216,
      "step": 307
    },
    {
      "epoch": 0.3183462532299742,
      "grad_norm": 0.012023339021971507,
      "learning_rate": 7.980810333717499e-06,
      "loss": 1.0416,
      "step": 308
    },
    {
      "epoch": 0.31937984496124033,
      "grad_norm": 0.009741451885717125,
      "learning_rate": 7.967334309017876e-06,
      "loss": 0.6637,
      "step": 309
    },
    {
      "epoch": 0.32041343669250644,
      "grad_norm": 0.004744076969091761,
      "learning_rate": 7.953824927345146e-06,
      "loss": 0.8739,
      "step": 310
    },
    {
      "epoch": 0.3214470284237726,
      "grad_norm": 0.008865286974752879,
      "learning_rate": 7.940282340563586e-06,
      "loss": 0.4163,
      "step": 311
    },
    {
      "epoch": 0.32248062015503876,
      "grad_norm": 0.020557633376467342,
      "learning_rate": 7.92670670091075e-06,
      "loss": 0.912,
      "step": 312
    },
    {
      "epoch": 0.3235142118863049,
      "grad_norm": 0.055738180858069336,
      "learning_rate": 7.913098160995742e-06,
      "loss": 0.6081,
      "step": 313
    },
    {
      "epoch": 0.3245478036175711,
      "grad_norm": 0.005665356558095313,
      "learning_rate": 7.899456873797519e-06,
      "loss": 1.0076,
      "step": 314
    },
    {
      "epoch": 0.32558139534883723,
      "grad_norm": 0.0081299290477942,
      "learning_rate": 7.885782992663162e-06,
      "loss": 0.9568,
      "step": 315
    },
    {
      "epoch": 0.32661498708010334,
      "grad_norm": 0.007381751766665286,
      "learning_rate": 7.87207667130615e-06,
      "loss": 0.8491,
      "step": 316
    },
    {
      "epoch": 0.3276485788113695,
      "grad_norm": 0.007038104847270785,
      "learning_rate": 7.858338063804638e-06,
      "loss": 0.6492,
      "step": 317
    },
    {
      "epoch": 0.32868217054263565,
      "grad_norm": 0.005474398946100628,
      "learning_rate": 7.84456732459972e-06,
      "loss": 0.5055,
      "step": 318
    },
    {
      "epoch": 0.3297157622739018,
      "grad_norm": 0.023288290982555998,
      "learning_rate": 7.830764608493697e-06,
      "loss": 1.182,
      "step": 319
    },
    {
      "epoch": 0.330749354005168,
      "grad_norm": 0.013193807341182287,
      "learning_rate": 7.816930070648335e-06,
      "loss": 0.7203,
      "step": 320
    },
    {
      "epoch": 0.33178294573643413,
      "grad_norm": 0.006296091259566541,
      "learning_rate": 7.803063866583119e-06,
      "loss": 0.4248,
      "step": 321
    },
    {
      "epoch": 0.33281653746770024,
      "grad_norm": 0.014934192031651573,
      "learning_rate": 7.789166152173508e-06,
      "loss": 0.7943,
      "step": 322
    },
    {
      "epoch": 0.3338501291989664,
      "grad_norm": 0.09596690901692774,
      "learning_rate": 7.775237083649182e-06,
      "loss": 0.6135,
      "step": 323
    },
    {
      "epoch": 0.33488372093023255,
      "grad_norm": 0.028690179793865813,
      "learning_rate": 7.761276817592283e-06,
      "loss": 0.445,
      "step": 324
    },
    {
      "epoch": 0.3359173126614987,
      "grad_norm": 0.026053550113641938,
      "learning_rate": 7.747285510935654e-06,
      "loss": 1.1765,
      "step": 325
    },
    {
      "epoch": 0.33695090439276487,
      "grad_norm": 0.00647173472300007,
      "learning_rate": 7.733263320961087e-06,
      "loss": 0.4008,
      "step": 326
    },
    {
      "epoch": 0.33798449612403103,
      "grad_norm": 0.0049292977888077665,
      "learning_rate": 7.719210405297537e-06,
      "loss": 0.3822,
      "step": 327
    },
    {
      "epoch": 0.33901808785529713,
      "grad_norm": 0.01309220866108371,
      "learning_rate": 7.705126921919358e-06,
      "loss": 0.6897,
      "step": 328
    },
    {
      "epoch": 0.3400516795865633,
      "grad_norm": 0.029236288983560777,
      "learning_rate": 7.691013029144535e-06,
      "loss": 0.4641,
      "step": 329
    },
    {
      "epoch": 0.34108527131782945,
      "grad_norm": 0.019258360664585188,
      "learning_rate": 7.676868885632893e-06,
      "loss": 1.0071,
      "step": 330
    },
    {
      "epoch": 0.3421188630490956,
      "grad_norm": 0.0225241578943125,
      "learning_rate": 7.662694650384315e-06,
      "loss": 0.6181,
      "step": 331
    },
    {
      "epoch": 0.34315245478036177,
      "grad_norm": 0.014206415885129355,
      "learning_rate": 7.648490482736959e-06,
      "loss": 0.853,
      "step": 332
    },
    {
      "epoch": 0.34418604651162793,
      "grad_norm": 0.02897365312203941,
      "learning_rate": 7.634256542365468e-06,
      "loss": 1.0363,
      "step": 333
    },
    {
      "epoch": 0.34521963824289403,
      "grad_norm": 0.014294971929582172,
      "learning_rate": 7.6199929892791666e-06,
      "loss": 1.0074,
      "step": 334
    },
    {
      "epoch": 0.3462532299741602,
      "grad_norm": 0.0071463474003256,
      "learning_rate": 7.60569998382027e-06,
      "loss": 0.7751,
      "step": 335
    },
    {
      "epoch": 0.34728682170542635,
      "grad_norm": 0.028929591346858536,
      "learning_rate": 7.591377686662081e-06,
      "loss": 0.8416,
      "step": 336
    },
    {
      "epoch": 0.3483204134366925,
      "grad_norm": 0.005651011689065906,
      "learning_rate": 7.577026258807181e-06,
      "loss": 0.9026,
      "step": 337
    },
    {
      "epoch": 0.34935400516795867,
      "grad_norm": 0.020982411217975495,
      "learning_rate": 7.562645861585615e-06,
      "loss": 0.4778,
      "step": 338
    },
    {
      "epoch": 0.35038759689922483,
      "grad_norm": 0.04961677757213418,
      "learning_rate": 7.548236656653095e-06,
      "loss": 0.9667,
      "step": 339
    },
    {
      "epoch": 0.35142118863049093,
      "grad_norm": 0.00495827459973854,
      "learning_rate": 7.533798805989165e-06,
      "loss": 0.9248,
      "step": 340
    },
    {
      "epoch": 0.3524547803617571,
      "grad_norm": 0.02214374888204119,
      "learning_rate": 7.519332471895384e-06,
      "loss": 1.0824,
      "step": 341
    },
    {
      "epoch": 0.35348837209302325,
      "grad_norm": 0.005286144996712277,
      "learning_rate": 7.504837816993513e-06,
      "loss": 0.6721,
      "step": 342
    },
    {
      "epoch": 0.3545219638242894,
      "grad_norm": 0.07030853036866515,
      "learning_rate": 7.490315004223672e-06,
      "loss": 1.1212,
      "step": 343
    },
    {
      "epoch": 0.35555555555555557,
      "grad_norm": 0.019589111956018684,
      "learning_rate": 7.475764196842516e-06,
      "loss": 1.0242,
      "step": 344
    },
    {
      "epoch": 0.35658914728682173,
      "grad_norm": 0.0228510672825355,
      "learning_rate": 7.4611855584214e-06,
      "loss": 0.4164,
      "step": 345
    },
    {
      "epoch": 0.35762273901808783,
      "grad_norm": 0.01293372717838685,
      "learning_rate": 7.446579252844536e-06,
      "loss": 0.6629,
      "step": 346
    },
    {
      "epoch": 0.358656330749354,
      "grad_norm": 0.012392336795476511,
      "learning_rate": 7.431945444307157e-06,
      "loss": 1.0665,
      "step": 347
    },
    {
      "epoch": 0.35968992248062015,
      "grad_norm": 0.005791093626999456,
      "learning_rate": 7.417284297313665e-06,
      "loss": 0.5063,
      "step": 348
    },
    {
      "epoch": 0.3607235142118863,
      "grad_norm": 0.004513882152072312,
      "learning_rate": 7.402595976675785e-06,
      "loss": 0.4099,
      "step": 349
    },
    {
      "epoch": 0.36175710594315247,
      "grad_norm": 0.022376016180682672,
      "learning_rate": 7.387880647510709e-06,
      "loss": 1.0233,
      "step": 350
    },
    {
      "epoch": 0.3627906976744186,
      "grad_norm": 0.0344632348444727,
      "learning_rate": 7.37313847523925e-06,
      "loss": 0.4913,
      "step": 351
    },
    {
      "epoch": 0.36382428940568473,
      "grad_norm": 0.003445131346454811,
      "learning_rate": 7.358369625583966e-06,
      "loss": 0.9605,
      "step": 352
    },
    {
      "epoch": 0.3648578811369509,
      "grad_norm": 0.02384540189425066,
      "learning_rate": 7.343574264567311e-06,
      "loss": 1.4086,
      "step": 353
    },
    {
      "epoch": 0.36589147286821705,
      "grad_norm": 0.004809197548604081,
      "learning_rate": 7.3287525585097615e-06,
      "loss": 0.4842,
      "step": 354
    },
    {
      "epoch": 0.3669250645994832,
      "grad_norm": 0.031123565589591843,
      "learning_rate": 7.313904674027954e-06,
      "loss": 1.281,
      "step": 355
    },
    {
      "epoch": 0.36795865633074937,
      "grad_norm": 0.04891824364075138,
      "learning_rate": 7.299030778032799e-06,
      "loss": 0.7497,
      "step": 356
    },
    {
      "epoch": 0.3689922480620155,
      "grad_norm": 0.014724515830862272,
      "learning_rate": 7.284131037727618e-06,
      "loss": 0.8537,
      "step": 357
    },
    {
      "epoch": 0.37002583979328163,
      "grad_norm": 0.12118056397099414,
      "learning_rate": 7.269205620606259e-06,
      "loss": 0.6666,
      "step": 358
    },
    {
      "epoch": 0.3710594315245478,
      "grad_norm": 0.0179664302218401,
      "learning_rate": 7.2542546944512106e-06,
      "loss": 0.8365,
      "step": 359
    },
    {
      "epoch": 0.37209302325581395,
      "grad_norm": 0.02436997414898339,
      "learning_rate": 7.239278427331718e-06,
      "loss": 0.6227,
      "step": 360
    },
    {
      "epoch": 0.3731266149870801,
      "grad_norm": 0.023950831829555232,
      "learning_rate": 7.224276987601895e-06,
      "loss": 0.8476,
      "step": 361
    },
    {
      "epoch": 0.37416020671834627,
      "grad_norm": 0.012331992703975789,
      "learning_rate": 7.209250543898834e-06,
      "loss": 0.4316,
      "step": 362
    },
    {
      "epoch": 0.3751937984496124,
      "grad_norm": 0.021072752753651534,
      "learning_rate": 7.194199265140701e-06,
      "loss": 1.1905,
      "step": 363
    },
    {
      "epoch": 0.37622739018087853,
      "grad_norm": 0.010410578157731045,
      "learning_rate": 7.179123320524848e-06,
      "loss": 0.4049,
      "step": 364
    },
    {
      "epoch": 0.3772609819121447,
      "grad_norm": 0.032991062438229336,
      "learning_rate": 7.1640228795259025e-06,
      "loss": 0.7922,
      "step": 365
    },
    {
      "epoch": 0.37829457364341085,
      "grad_norm": 0.011078613319413465,
      "learning_rate": 7.148898111893867e-06,
      "loss": 0.7205,
      "step": 366
    },
    {
      "epoch": 0.379328165374677,
      "grad_norm": 0.04376414252839199,
      "learning_rate": 7.133749187652208e-06,
      "loss": 0.5727,
      "step": 367
    },
    {
      "epoch": 0.38036175710594317,
      "grad_norm": 0.04062568416936861,
      "learning_rate": 7.118576277095944e-06,
      "loss": 0.6987,
      "step": 368
    },
    {
      "epoch": 0.3813953488372093,
      "grad_norm": 0.03081676446572152,
      "learning_rate": 7.103379550789741e-06,
      "loss": 0.7791,
      "step": 369
    },
    {
      "epoch": 0.38242894056847543,
      "grad_norm": 0.01940992767789913,
      "learning_rate": 7.088159179565978e-06,
      "loss": 0.8212,
      "step": 370
    },
    {
      "epoch": 0.3834625322997416,
      "grad_norm": 0.024413281905129588,
      "learning_rate": 7.07291533452284e-06,
      "loss": 1.283,
      "step": 371
    },
    {
      "epoch": 0.38449612403100775,
      "grad_norm": 0.017628923028504304,
      "learning_rate": 7.05764818702239e-06,
      "loss": 0.6612,
      "step": 372
    },
    {
      "epoch": 0.3855297157622739,
      "grad_norm": 0.02664533437997738,
      "learning_rate": 7.042357908688646e-06,
      "loss": 0.59,
      "step": 373
    },
    {
      "epoch": 0.38656330749354006,
      "grad_norm": 0.05683194558190877,
      "learning_rate": 7.027044671405643e-06,
      "loss": 0.8984,
      "step": 374
    },
    {
      "epoch": 0.3875968992248062,
      "grad_norm": 0.019539641721689947,
      "learning_rate": 7.0117086473155095e-06,
      "loss": 0.9121,
      "step": 375
    },
    {
      "epoch": 0.3886304909560723,
      "grad_norm": 0.014549720228512723,
      "learning_rate": 6.996350008816532e-06,
      "loss": 0.6129,
      "step": 376
    },
    {
      "epoch": 0.3896640826873385,
      "grad_norm": 0.010089853634539674,
      "learning_rate": 6.980968928561209e-06,
      "loss": 0.8064,
      "step": 377
    },
    {
      "epoch": 0.39069767441860465,
      "grad_norm": 0.016549572192554634,
      "learning_rate": 6.965565579454322e-06,
      "loss": 1.1178,
      "step": 378
    },
    {
      "epoch": 0.3917312661498708,
      "grad_norm": 0.028670313295816124,
      "learning_rate": 6.9501401346509786e-06,
      "loss": 0.9318,
      "step": 379
    },
    {
      "epoch": 0.39276485788113696,
      "grad_norm": 0.04818941575433965,
      "learning_rate": 6.934692767554679e-06,
      "loss": 0.6241,
      "step": 380
    },
    {
      "epoch": 0.3937984496124031,
      "grad_norm": 0.05021868734743847,
      "learning_rate": 6.9192236518153566e-06,
      "loss": 0.4802,
      "step": 381
    },
    {
      "epoch": 0.3948320413436692,
      "grad_norm": 0.03791700470987089,
      "learning_rate": 6.903732961327432e-06,
      "loss": 0.4523,
      "step": 382
    },
    {
      "epoch": 0.3958656330749354,
      "grad_norm": 0.00931022196405789,
      "learning_rate": 6.888220870227853e-06,
      "loss": 0.3795,
      "step": 383
    },
    {
      "epoch": 0.39689922480620154,
      "grad_norm": 0.018174633249550532,
      "learning_rate": 6.872687552894145e-06,
      "loss": 0.6997,
      "step": 384
    },
    {
      "epoch": 0.3979328165374677,
      "grad_norm": 0.02101748102238638,
      "learning_rate": 6.857133183942442e-06,
      "loss": 0.5357,
      "step": 385
    },
    {
      "epoch": 0.39896640826873386,
      "grad_norm": 0.010316129379993325,
      "learning_rate": 6.841557938225527e-06,
      "loss": 0.4334,
      "step": 386
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.04072906398208321,
      "learning_rate": 6.825961990830871e-06,
      "loss": 1.2997,
      "step": 387
    },
    {
      "epoch": 0.4010335917312661,
      "grad_norm": 0.014813116670626823,
      "learning_rate": 6.810345517078657e-06,
      "loss": 0.3407,
      "step": 388
    },
    {
      "epoch": 0.4020671834625323,
      "grad_norm": 0.03307437930127245,
      "learning_rate": 6.794708692519815e-06,
      "loss": 0.8281,
      "step": 389
    },
    {
      "epoch": 0.40310077519379844,
      "grad_norm": 0.013132819683017712,
      "learning_rate": 6.779051692934043e-06,
      "loss": 0.3943,
      "step": 390
    },
    {
      "epoch": 0.4041343669250646,
      "grad_norm": 0.052959600479985644,
      "learning_rate": 6.76337469432784e-06,
      "loss": 0.6076,
      "step": 391
    },
    {
      "epoch": 0.40516795865633076,
      "grad_norm": 0.02951844048145289,
      "learning_rate": 6.747677872932519e-06,
      "loss": 0.7504,
      "step": 392
    },
    {
      "epoch": 0.4062015503875969,
      "grad_norm": 0.016614775289315147,
      "learning_rate": 6.731961405202224e-06,
      "loss": 0.7339,
      "step": 393
    },
    {
      "epoch": 0.407235142118863,
      "grad_norm": 0.031538488116570826,
      "learning_rate": 6.716225467811961e-06,
      "loss": 0.8588,
      "step": 394
    },
    {
      "epoch": 0.4082687338501292,
      "grad_norm": 0.028168487000190066,
      "learning_rate": 6.700470237655597e-06,
      "loss": 0.9797,
      "step": 395
    },
    {
      "epoch": 0.40930232558139534,
      "grad_norm": 0.021108785497032576,
      "learning_rate": 6.684695891843871e-06,
      "loss": 0.7985,
      "step": 396
    },
    {
      "epoch": 0.4103359173126615,
      "grad_norm": 0.006734723357934439,
      "learning_rate": 6.668902607702419e-06,
      "loss": 0.3963,
      "step": 397
    },
    {
      "epoch": 0.41136950904392766,
      "grad_norm": 0.017507004853911937,
      "learning_rate": 6.653090562769764e-06,
      "loss": 1.1782,
      "step": 398
    },
    {
      "epoch": 0.4124031007751938,
      "grad_norm": 0.015855307666841546,
      "learning_rate": 6.637259934795328e-06,
      "loss": 0.7495,
      "step": 399
    },
    {
      "epoch": 0.4134366925064599,
      "grad_norm": 0.020996448936050797,
      "learning_rate": 6.6214109017374306e-06,
      "loss": 0.9322,
      "step": 400
    },
    {
      "epoch": 0.4144702842377261,
      "grad_norm": 0.035250440208424355,
      "learning_rate": 6.605543641761293e-06,
      "loss": 0.4028,
      "step": 401
    },
    {
      "epoch": 0.41550387596899224,
      "grad_norm": 0.022498244162240503,
      "learning_rate": 6.589658333237031e-06,
      "loss": 1.2494,
      "step": 402
    },
    {
      "epoch": 0.4165374677002584,
      "grad_norm": 0.02129917945499703,
      "learning_rate": 6.573755154737651e-06,
      "loss": 0.8523,
      "step": 403
    },
    {
      "epoch": 0.41757105943152456,
      "grad_norm": 0.03563954574783468,
      "learning_rate": 6.5578342850370415e-06,
      "loss": 0.7037,
      "step": 404
    },
    {
      "epoch": 0.4186046511627907,
      "grad_norm": 0.025340026852780365,
      "learning_rate": 6.54189590310797e-06,
      "loss": 0.4871,
      "step": 405
    },
    {
      "epoch": 0.4196382428940568,
      "grad_norm": 0.017063859523512078,
      "learning_rate": 6.525940188120059e-06,
      "loss": 0.9409,
      "step": 406
    },
    {
      "epoch": 0.420671834625323,
      "grad_norm": 0.009032306963271983,
      "learning_rate": 6.509967319437782e-06,
      "loss": 0.5963,
      "step": 407
    },
    {
      "epoch": 0.42170542635658914,
      "grad_norm": 0.009951753472800124,
      "learning_rate": 6.493977476618445e-06,
      "loss": 0.6263,
      "step": 408
    },
    {
      "epoch": 0.4227390180878553,
      "grad_norm": 0.025955352185938172,
      "learning_rate": 6.477970839410166e-06,
      "loss": 1.1226,
      "step": 409
    },
    {
      "epoch": 0.42377260981912146,
      "grad_norm": 0.012873639031204242,
      "learning_rate": 6.461947587749855e-06,
      "loss": 0.7973,
      "step": 410
    },
    {
      "epoch": 0.4248062015503876,
      "grad_norm": 0.02262335500916236,
      "learning_rate": 6.445907901761189e-06,
      "loss": 0.4962,
      "step": 411
    },
    {
      "epoch": 0.4258397932816537,
      "grad_norm": 0.02204553881522071,
      "learning_rate": 6.429851961752597e-06,
      "loss": 0.6614,
      "step": 412
    },
    {
      "epoch": 0.4268733850129199,
      "grad_norm": 0.03832066750805277,
      "learning_rate": 6.413779948215218e-06,
      "loss": 0.7953,
      "step": 413
    },
    {
      "epoch": 0.42790697674418604,
      "grad_norm": 0.015787489437990014,
      "learning_rate": 6.397692041820885e-06,
      "loss": 0.4702,
      "step": 414
    },
    {
      "epoch": 0.4289405684754522,
      "grad_norm": 0.011781407701375422,
      "learning_rate": 6.381588423420085e-06,
      "loss": 0.3792,
      "step": 415
    },
    {
      "epoch": 0.42997416020671836,
      "grad_norm": 0.06633793303578112,
      "learning_rate": 6.365469274039936e-06,
      "loss": 0.7947,
      "step": 416
    },
    {
      "epoch": 0.4310077519379845,
      "grad_norm": 0.05256901699384684,
      "learning_rate": 6.349334774882136e-06,
      "loss": 0.4644,
      "step": 417
    },
    {
      "epoch": 0.4320413436692506,
      "grad_norm": 0.05680285070126902,
      "learning_rate": 6.333185107320945e-06,
      "loss": 0.5095,
      "step": 418
    },
    {
      "epoch": 0.4330749354005168,
      "grad_norm": 0.027128727367527722,
      "learning_rate": 6.317020452901134e-06,
      "loss": 0.8882,
      "step": 419
    },
    {
      "epoch": 0.43410852713178294,
      "grad_norm": 0.04150089696699131,
      "learning_rate": 6.300840993335945e-06,
      "loss": 0.3852,
      "step": 420
    },
    {
      "epoch": 0.4351421188630491,
      "grad_norm": 0.029574398972528466,
      "learning_rate": 6.2846469105050545e-06,
      "loss": 0.7508,
      "step": 421
    },
    {
      "epoch": 0.43617571059431526,
      "grad_norm": 0.04985017634493448,
      "learning_rate": 6.26843838645252e-06,
      "loss": 0.9698,
      "step": 422
    },
    {
      "epoch": 0.4372093023255814,
      "grad_norm": 0.0565054622085898,
      "learning_rate": 6.2522156033847435e-06,
      "loss": 0.6499,
      "step": 423
    },
    {
      "epoch": 0.4382428940568475,
      "grad_norm": 0.042549417347473945,
      "learning_rate": 6.235978743668415e-06,
      "loss": 0.7806,
      "step": 424
    },
    {
      "epoch": 0.4392764857881137,
      "grad_norm": 0.05490225780752476,
      "learning_rate": 6.219727989828466e-06,
      "loss": 1.1882,
      "step": 425
    },
    {
      "epoch": 0.44031007751937984,
      "grad_norm": 0.025677024786103918,
      "learning_rate": 6.203463524546017e-06,
      "loss": 0.8844,
      "step": 426
    },
    {
      "epoch": 0.441343669250646,
      "grad_norm": 0.05999612759280204,
      "learning_rate": 6.187185530656327e-06,
      "loss": 0.8175,
      "step": 427
    },
    {
      "epoch": 0.44237726098191216,
      "grad_norm": 0.01288202385675318,
      "learning_rate": 6.1708941911467335e-06,
      "loss": 0.6224,
      "step": 428
    },
    {
      "epoch": 0.4434108527131783,
      "grad_norm": 0.06372065124845469,
      "learning_rate": 6.154589689154594e-06,
      "loss": 1.2608,
      "step": 429
    },
    {
      "epoch": 0.4444444444444444,
      "grad_norm": 0.07071309956352403,
      "learning_rate": 6.138272207965238e-06,
      "loss": 0.3307,
      "step": 430
    },
    {
      "epoch": 0.4454780361757106,
      "grad_norm": 0.01096993661397583,
      "learning_rate": 6.121941931009894e-06,
      "loss": 0.3185,
      "step": 431
    },
    {
      "epoch": 0.44651162790697674,
      "grad_norm": 0.28617985560655124,
      "learning_rate": 6.105599041863631e-06,
      "loss": 0.8682,
      "step": 432
    },
    {
      "epoch": 0.4475452196382429,
      "grad_norm": 0.07676944100531895,
      "learning_rate": 6.089243724243303e-06,
      "loss": 0.5193,
      "step": 433
    },
    {
      "epoch": 0.44857881136950906,
      "grad_norm": 0.12663591291044893,
      "learning_rate": 6.072876162005474e-06,
      "loss": 1.0511,
      "step": 434
    },
    {
      "epoch": 0.4496124031007752,
      "grad_norm": 0.03207212408723851,
      "learning_rate": 6.056496539144351e-06,
      "loss": 0.564,
      "step": 435
    },
    {
      "epoch": 0.4506459948320413,
      "grad_norm": 0.0774145748271399,
      "learning_rate": 6.040105039789726e-06,
      "loss": 0.8432,
      "step": 436
    },
    {
      "epoch": 0.4516795865633075,
      "grad_norm": 0.09076803443085356,
      "learning_rate": 6.023701848204893e-06,
      "loss": 1.2133,
      "step": 437
    },
    {
      "epoch": 0.45271317829457364,
      "grad_norm": 0.007291079884666382,
      "learning_rate": 6.007287148784591e-06,
      "loss": 0.4359,
      "step": 438
    },
    {
      "epoch": 0.4537467700258398,
      "grad_norm": 0.027779698402429474,
      "learning_rate": 5.990861126052914e-06,
      "loss": 0.5437,
      "step": 439
    },
    {
      "epoch": 0.45478036175710596,
      "grad_norm": 0.035868890122080284,
      "learning_rate": 5.974423964661249e-06,
      "loss": 1.1165,
      "step": 440
    },
    {
      "epoch": 0.4558139534883721,
      "grad_norm": 0.019802946106540358,
      "learning_rate": 5.957975849386202e-06,
      "loss": 0.5625,
      "step": 441
    },
    {
      "epoch": 0.4568475452196382,
      "grad_norm": 0.011873063831459313,
      "learning_rate": 5.941516965127509e-06,
      "loss": 0.5922,
      "step": 442
    },
    {
      "epoch": 0.4578811369509044,
      "grad_norm": 0.0046189360202636325,
      "learning_rate": 5.925047496905968e-06,
      "loss": 0.8321,
      "step": 443
    },
    {
      "epoch": 0.45891472868217054,
      "grad_norm": 0.010579852867942401,
      "learning_rate": 5.908567629861354e-06,
      "loss": 0.5339,
      "step": 444
    },
    {
      "epoch": 0.4599483204134367,
      "grad_norm": 0.00750605418950693,
      "learning_rate": 5.892077549250341e-06,
      "loss": 1.1516,
      "step": 445
    },
    {
      "epoch": 0.46098191214470285,
      "grad_norm": 0.040185776157958895,
      "learning_rate": 5.8755774404444175e-06,
      "loss": 0.705,
      "step": 446
    },
    {
      "epoch": 0.462015503875969,
      "grad_norm": 0.007988508003748845,
      "learning_rate": 5.8590674889278e-06,
      "loss": 0.5773,
      "step": 447
    },
    {
      "epoch": 0.4630490956072351,
      "grad_norm": 0.010779236689898011,
      "learning_rate": 5.842547880295353e-06,
      "loss": 0.8962,
      "step": 448
    },
    {
      "epoch": 0.4640826873385013,
      "grad_norm": 0.014594057234773247,
      "learning_rate": 5.8260188002505034e-06,
      "loss": 0.604,
      "step": 449
    },
    {
      "epoch": 0.46511627906976744,
      "grad_norm": 0.035369497150471815,
      "learning_rate": 5.809480434603143e-06,
      "loss": 0.4374,
      "step": 450
    },
    {
      "epoch": 0.4661498708010336,
      "grad_norm": 0.06434053190999267,
      "learning_rate": 5.792932969267553e-06,
      "loss": 0.8918,
      "step": 451
    },
    {
      "epoch": 0.46718346253229975,
      "grad_norm": 0.014403763977631233,
      "learning_rate": 5.776376590260306e-06,
      "loss": 1.0082,
      "step": 452
    },
    {
      "epoch": 0.4682170542635659,
      "grad_norm": 0.029601989910457323,
      "learning_rate": 5.759811483698173e-06,
      "loss": 1.0038,
      "step": 453
    },
    {
      "epoch": 0.469250645994832,
      "grad_norm": 0.024481686026953078,
      "learning_rate": 5.743237835796042e-06,
      "loss": 0.8439,
      "step": 454
    },
    {
      "epoch": 0.4702842377260982,
      "grad_norm": 0.01773453261912373,
      "learning_rate": 5.726655832864809e-06,
      "loss": 0.6911,
      "step": 455
    },
    {
      "epoch": 0.47131782945736433,
      "grad_norm": 0.004373920163417571,
      "learning_rate": 5.7100656613093005e-06,
      "loss": 0.7162,
      "step": 456
    },
    {
      "epoch": 0.4723514211886305,
      "grad_norm": 0.039920949836624545,
      "learning_rate": 5.693467507626165e-06,
      "loss": 0.8939,
      "step": 457
    },
    {
      "epoch": 0.47338501291989665,
      "grad_norm": 0.039291919266743254,
      "learning_rate": 5.67686155840178e-06,
      "loss": 0.6677,
      "step": 458
    },
    {
      "epoch": 0.4744186046511628,
      "grad_norm": 0.0927752688943313,
      "learning_rate": 5.660248000310162e-06,
      "loss": 0.8787,
      "step": 459
    },
    {
      "epoch": 0.4754521963824289,
      "grad_norm": 0.007300563402303619,
      "learning_rate": 5.643627020110855e-06,
      "loss": 0.8,
      "step": 460
    },
    {
      "epoch": 0.4764857881136951,
      "grad_norm": 0.03665925266808231,
      "learning_rate": 5.626998804646842e-06,
      "loss": 0.4528,
      "step": 461
    },
    {
      "epoch": 0.47751937984496123,
      "grad_norm": 0.013129088304823665,
      "learning_rate": 5.610363540842435e-06,
      "loss": 0.6275,
      "step": 462
    },
    {
      "epoch": 0.4785529715762274,
      "grad_norm": 0.006465487613079777,
      "learning_rate": 5.593721415701189e-06,
      "loss": 0.4735,
      "step": 463
    },
    {
      "epoch": 0.47958656330749355,
      "grad_norm": 0.038401686707533085,
      "learning_rate": 5.577072616303779e-06,
      "loss": 0.94,
      "step": 464
    },
    {
      "epoch": 0.4806201550387597,
      "grad_norm": 0.009548773908298652,
      "learning_rate": 5.560417329805916e-06,
      "loss": 0.5857,
      "step": 465
    },
    {
      "epoch": 0.4816537467700258,
      "grad_norm": 0.010652924115969013,
      "learning_rate": 5.543755743436231e-06,
      "loss": 0.6081,
      "step": 466
    },
    {
      "epoch": 0.482687338501292,
      "grad_norm": 0.020346594828861947,
      "learning_rate": 5.527088044494176e-06,
      "loss": 0.807,
      "step": 467
    },
    {
      "epoch": 0.48372093023255813,
      "grad_norm": 0.01303680214853511,
      "learning_rate": 5.510414420347918e-06,
      "loss": 0.5996,
      "step": 468
    },
    {
      "epoch": 0.4847545219638243,
      "grad_norm": 0.0694886944147393,
      "learning_rate": 5.493735058432227e-06,
      "loss": 1.3145,
      "step": 469
    },
    {
      "epoch": 0.48578811369509045,
      "grad_norm": 0.006531277724996309,
      "learning_rate": 5.477050146246379e-06,
      "loss": 0.3027,
      "step": 470
    },
    {
      "epoch": 0.4868217054263566,
      "grad_norm": 0.0044254609700046495,
      "learning_rate": 5.4603598713520354e-06,
      "loss": 0.3973,
      "step": 471
    },
    {
      "epoch": 0.4878552971576227,
      "grad_norm": 0.011295033144426552,
      "learning_rate": 5.443664421371153e-06,
      "loss": 0.6397,
      "step": 472
    },
    {
      "epoch": 0.4888888888888889,
      "grad_norm": 0.019770910725192336,
      "learning_rate": 5.426963983983853e-06,
      "loss": 0.9434,
      "step": 473
    },
    {
      "epoch": 0.48992248062015503,
      "grad_norm": 0.015037690357313206,
      "learning_rate": 5.410258746926328e-06,
      "loss": 0.6377,
      "step": 474
    },
    {
      "epoch": 0.4909560723514212,
      "grad_norm": 0.01448756911586595,
      "learning_rate": 5.393548897988724e-06,
      "loss": 0.8847,
      "step": 475
    },
    {
      "epoch": 0.49198966408268735,
      "grad_norm": 0.006241180726666852,
      "learning_rate": 5.376834625013031e-06,
      "loss": 0.3532,
      "step": 476
    },
    {
      "epoch": 0.4930232558139535,
      "grad_norm": 0.0057308962868511315,
      "learning_rate": 5.360116115890972e-06,
      "loss": 1.0043,
      "step": 477
    },
    {
      "epoch": 0.4940568475452196,
      "grad_norm": 0.0032690321869053296,
      "learning_rate": 5.343393558561888e-06,
      "loss": 0.8414,
      "step": 478
    },
    {
      "epoch": 0.49509043927648577,
      "grad_norm": 0.0067810108944598565,
      "learning_rate": 5.3266671410106306e-06,
      "loss": 0.4405,
      "step": 479
    },
    {
      "epoch": 0.49612403100775193,
      "grad_norm": 0.02344044375243021,
      "learning_rate": 5.309937051265443e-06,
      "loss": 0.9454,
      "step": 480
    },
    {
      "epoch": 0.4971576227390181,
      "grad_norm": 0.013915356178603587,
      "learning_rate": 5.293203477395851e-06,
      "loss": 0.5325,
      "step": 481
    },
    {
      "epoch": 0.49819121447028425,
      "grad_norm": 0.009386347270505833,
      "learning_rate": 5.276466607510544e-06,
      "loss": 0.5512,
      "step": 482
    },
    {
      "epoch": 0.4992248062015504,
      "grad_norm": 0.005037701075127895,
      "learning_rate": 5.259726629755268e-06,
      "loss": 0.9983,
      "step": 483
    },
    {
      "epoch": 0.5002583979328166,
      "grad_norm": 0.008265615385322772,
      "learning_rate": 5.2429837323107005e-06,
      "loss": 0.5858,
      "step": 484
    },
    {
      "epoch": 0.5012919896640827,
      "grad_norm": 0.0067421378252580915,
      "learning_rate": 5.226238103390343e-06,
      "loss": 0.6612,
      "step": 485
    },
    {
      "epoch": 0.5023255813953489,
      "grad_norm": 0.0044661374953286264,
      "learning_rate": 5.209489931238405e-06,
      "loss": 0.8252,
      "step": 486
    },
    {
      "epoch": 0.5033591731266149,
      "grad_norm": 0.009250731844578558,
      "learning_rate": 5.192739404127679e-06,
      "loss": 0.685,
      "step": 487
    },
    {
      "epoch": 0.5043927648578811,
      "grad_norm": 0.007370939193751542,
      "learning_rate": 5.175986710357439e-06,
      "loss": 0.4972,
      "step": 488
    },
    {
      "epoch": 0.5054263565891473,
      "grad_norm": 0.014483386863447748,
      "learning_rate": 5.159232038251305e-06,
      "loss": 0.8895,
      "step": 489
    },
    {
      "epoch": 0.5064599483204134,
      "grad_norm": 0.0030685672750464,
      "learning_rate": 5.142475576155146e-06,
      "loss": 1.23,
      "step": 490
    },
    {
      "epoch": 0.5074935400516796,
      "grad_norm": 0.005042593965957031,
      "learning_rate": 5.125717512434947e-06,
      "loss": 0.6903,
      "step": 491
    },
    {
      "epoch": 0.5085271317829457,
      "grad_norm": 0.004055264881986794,
      "learning_rate": 5.108958035474703e-06,
      "loss": 1.5122,
      "step": 492
    },
    {
      "epoch": 0.5095607235142119,
      "grad_norm": 0.010351092509396215,
      "learning_rate": 5.092197333674286e-06,
      "loss": 0.741,
      "step": 493
    },
    {
      "epoch": 0.510594315245478,
      "grad_norm": 0.002436070123780222,
      "learning_rate": 5.075435595447346e-06,
      "loss": 0.7222,
      "step": 494
    },
    {
      "epoch": 0.5116279069767442,
      "grad_norm": 0.028073474756643256,
      "learning_rate": 5.0586730092191835e-06,
      "loss": 0.8669,
      "step": 495
    },
    {
      "epoch": 0.5126614987080104,
      "grad_norm": 0.011155489712867101,
      "learning_rate": 5.041909763424625e-06,
      "loss": 0.4761,
      "step": 496
    },
    {
      "epoch": 0.5136950904392765,
      "grad_norm": 0.014794230804008004,
      "learning_rate": 5.0251460465059175e-06,
      "loss": 0.7386,
      "step": 497
    },
    {
      "epoch": 0.5147286821705427,
      "grad_norm": 0.013295929523544648,
      "learning_rate": 5.0083820469106016e-06,
      "loss": 0.9238,
      "step": 498
    },
    {
      "epoch": 0.5157622739018087,
      "grad_norm": 0.005519871430231462,
      "learning_rate": 4.991617953089399e-06,
      "loss": 0.898,
      "step": 499
    },
    {
      "epoch": 0.5167958656330749,
      "grad_norm": 0.01502886918005876,
      "learning_rate": 4.9748539534940825e-06,
      "loss": 0.5083,
      "step": 500
    },
    {
      "epoch": 0.517829457364341,
      "grad_norm": 0.006296119409349555,
      "learning_rate": 4.9580902365753765e-06,
      "loss": 0.6657,
      "step": 501
    },
    {
      "epoch": 0.5188630490956072,
      "grad_norm": 0.032112404088462575,
      "learning_rate": 4.941326990780819e-06,
      "loss": 0.6769,
      "step": 502
    },
    {
      "epoch": 0.5198966408268734,
      "grad_norm": 0.02043413344801753,
      "learning_rate": 4.9245644045526546e-06,
      "loss": 0.3595,
      "step": 503
    },
    {
      "epoch": 0.5209302325581395,
      "grad_norm": 0.004380865363179368,
      "learning_rate": 4.907802666325716e-06,
      "loss": 0.3217,
      "step": 504
    },
    {
      "epoch": 0.5219638242894057,
      "grad_norm": 0.011356798423078303,
      "learning_rate": 4.891041964525301e-06,
      "loss": 0.3235,
      "step": 505
    },
    {
      "epoch": 0.5229974160206718,
      "grad_norm": 0.030683275574193144,
      "learning_rate": 4.874282487565053e-06,
      "loss": 0.3476,
      "step": 506
    },
    {
      "epoch": 0.524031007751938,
      "grad_norm": 0.010006604437278915,
      "learning_rate": 4.857524423844855e-06,
      "loss": 0.7429,
      "step": 507
    },
    {
      "epoch": 0.5250645994832042,
      "grad_norm": 0.011586620597804333,
      "learning_rate": 4.840767961748697e-06,
      "loss": 0.3969,
      "step": 508
    },
    {
      "epoch": 0.5260981912144703,
      "grad_norm": 0.007542134626827019,
      "learning_rate": 4.824013289642563e-06,
      "loss": 0.7813,
      "step": 509
    },
    {
      "epoch": 0.5271317829457365,
      "grad_norm": 0.014910488059212225,
      "learning_rate": 4.807260595872322e-06,
      "loss": 0.7838,
      "step": 510
    },
    {
      "epoch": 0.5281653746770025,
      "grad_norm": 0.00796348295042139,
      "learning_rate": 4.790510068761596e-06,
      "loss": 0.9715,
      "step": 511
    },
    {
      "epoch": 0.5291989664082687,
      "grad_norm": 0.014932917109666943,
      "learning_rate": 4.773761896609658e-06,
      "loss": 1.3634,
      "step": 512
    },
    {
      "epoch": 0.5302325581395348,
      "grad_norm": 0.023989977869525256,
      "learning_rate": 4.757016267689302e-06,
      "loss": 0.3132,
      "step": 513
    },
    {
      "epoch": 0.531266149870801,
      "grad_norm": 0.008249050072984146,
      "learning_rate": 4.740273370244734e-06,
      "loss": 0.6358,
      "step": 514
    },
    {
      "epoch": 0.5322997416020672,
      "grad_norm": 0.012048939285676477,
      "learning_rate": 4.723533392489457e-06,
      "loss": 0.9938,
      "step": 515
    },
    {
      "epoch": 0.5333333333333333,
      "grad_norm": 0.004222184326040323,
      "learning_rate": 4.706796522604152e-06,
      "loss": 0.5341,
      "step": 516
    },
    {
      "epoch": 0.5343669250645995,
      "grad_norm": 0.03735462608454422,
      "learning_rate": 4.690062948734558e-06,
      "loss": 0.6675,
      "step": 517
    },
    {
      "epoch": 0.5354005167958656,
      "grad_norm": 0.01664023840985042,
      "learning_rate": 4.673332858989371e-06,
      "loss": 0.8232,
      "step": 518
    },
    {
      "epoch": 0.5364341085271318,
      "grad_norm": 0.0038102848840878153,
      "learning_rate": 4.656606441438114e-06,
      "loss": 0.5585,
      "step": 519
    },
    {
      "epoch": 0.537467700258398,
      "grad_norm": 0.036712450220883174,
      "learning_rate": 4.639883884109029e-06,
      "loss": 0.4638,
      "step": 520
    },
    {
      "epoch": 0.5385012919896641,
      "grad_norm": 0.0035144800477870572,
      "learning_rate": 4.623165374986971e-06,
      "loss": 0.8568,
      "step": 521
    },
    {
      "epoch": 0.5395348837209303,
      "grad_norm": 0.008326606980188417,
      "learning_rate": 4.606451102011278e-06,
      "loss": 0.6307,
      "step": 522
    },
    {
      "epoch": 0.5405684754521963,
      "grad_norm": 0.033557347806890216,
      "learning_rate": 4.589741253073673e-06,
      "loss": 1.2345,
      "step": 523
    },
    {
      "epoch": 0.5416020671834625,
      "grad_norm": 0.0060354792209766535,
      "learning_rate": 4.573036016016149e-06,
      "loss": 0.4011,
      "step": 524
    },
    {
      "epoch": 0.5426356589147286,
      "grad_norm": 0.0162141853124205,
      "learning_rate": 4.556335578628849e-06,
      "loss": 0.8252,
      "step": 525
    },
    {
      "epoch": 0.5436692506459948,
      "grad_norm": 0.06638912008024864,
      "learning_rate": 4.539640128647965e-06,
      "loss": 0.8821,
      "step": 526
    },
    {
      "epoch": 0.544702842377261,
      "grad_norm": 0.006460414629417034,
      "learning_rate": 4.522949853753624e-06,
      "loss": 0.7099,
      "step": 527
    },
    {
      "epoch": 0.5457364341085271,
      "grad_norm": 0.043775820398270514,
      "learning_rate": 4.506264941567774e-06,
      "loss": 0.6159,
      "step": 528
    },
    {
      "epoch": 0.5467700258397933,
      "grad_norm": 0.011664996491912476,
      "learning_rate": 4.489585579652083e-06,
      "loss": 0.8875,
      "step": 529
    },
    {
      "epoch": 0.5478036175710594,
      "grad_norm": 0.0003791722764498701,
      "learning_rate": 4.472911955505825e-06,
      "loss": 0.8595,
      "step": 530
    },
    {
      "epoch": 0.5488372093023256,
      "grad_norm": 0.015182372099187835,
      "learning_rate": 4.456244256563769e-06,
      "loss": 0.6812,
      "step": 531
    },
    {
      "epoch": 0.5498708010335918,
      "grad_norm": 0.017727125314341157,
      "learning_rate": 4.439582670194086e-06,
      "loss": 0.9976,
      "step": 532
    },
    {
      "epoch": 0.5509043927648579,
      "grad_norm": 0.011460613171317498,
      "learning_rate": 4.422927383696224e-06,
      "loss": 0.7254,
      "step": 533
    },
    {
      "epoch": 0.5519379844961241,
      "grad_norm": 0.005849440914326224,
      "learning_rate": 4.406278584298813e-06,
      "loss": 0.8355,
      "step": 534
    },
    {
      "epoch": 0.5529715762273901,
      "grad_norm": 0.023247042984527202,
      "learning_rate": 4.389636459157567e-06,
      "loss": 0.4297,
      "step": 535
    },
    {
      "epoch": 0.5540051679586563,
      "grad_norm": 0.005454143327876756,
      "learning_rate": 4.373001195353159e-06,
      "loss": 0.6921,
      "step": 536
    },
    {
      "epoch": 0.5550387596899224,
      "grad_norm": 0.005594372709690803,
      "learning_rate": 4.356372979889146e-06,
      "loss": 0.2422,
      "step": 537
    },
    {
      "epoch": 0.5560723514211886,
      "grad_norm": 0.01593137786475084,
      "learning_rate": 4.339751999689839e-06,
      "loss": 0.9746,
      "step": 538
    },
    {
      "epoch": 0.5571059431524548,
      "grad_norm": 0.007422398650452836,
      "learning_rate": 4.323138441598219e-06,
      "loss": 0.8939,
      "step": 539
    },
    {
      "epoch": 0.5581395348837209,
      "grad_norm": 0.004200239267851968,
      "learning_rate": 4.306532492373836e-06,
      "loss": 0.5264,
      "step": 540
    },
    {
      "epoch": 0.5591731266149871,
      "grad_norm": 0.021557521985436548,
      "learning_rate": 4.2899343386907e-06,
      "loss": 0.9029,
      "step": 541
    },
    {
      "epoch": 0.5602067183462532,
      "grad_norm": 0.006716706291650855,
      "learning_rate": 4.273344167135191e-06,
      "loss": 0.4856,
      "step": 542
    },
    {
      "epoch": 0.5612403100775194,
      "grad_norm": 0.008552707286225408,
      "learning_rate": 4.25676216420396e-06,
      "loss": 0.7185,
      "step": 543
    },
    {
      "epoch": 0.5622739018087856,
      "grad_norm": 0.01921812924949712,
      "learning_rate": 4.240188516301829e-06,
      "loss": 0.5839,
      "step": 544
    },
    {
      "epoch": 0.5633074935400517,
      "grad_norm": 0.015669453548882124,
      "learning_rate": 4.223623409739695e-06,
      "loss": 0.8572,
      "step": 545
    },
    {
      "epoch": 0.5643410852713179,
      "grad_norm": 0.03867810779533852,
      "learning_rate": 4.207067030732449e-06,
      "loss": 0.7622,
      "step": 546
    },
    {
      "epoch": 0.5653746770025839,
      "grad_norm": 0.008112308284906434,
      "learning_rate": 4.190519565396859e-06,
      "loss": 0.388,
      "step": 547
    },
    {
      "epoch": 0.5664082687338501,
      "grad_norm": 0.004188420335590479,
      "learning_rate": 4.173981199749498e-06,
      "loss": 0.7177,
      "step": 548
    },
    {
      "epoch": 0.5674418604651162,
      "grad_norm": 0.021789370266695442,
      "learning_rate": 4.157452119704648e-06,
      "loss": 0.8751,
      "step": 549
    },
    {
      "epoch": 0.5684754521963824,
      "grad_norm": 0.004877653173043439,
      "learning_rate": 4.140932511072201e-06,
      "loss": 0.7656,
      "step": 550
    },
    {
      "epoch": 0.5695090439276486,
      "grad_norm": 0.01017081055383884,
      "learning_rate": 4.124422559555584e-06,
      "loss": 0.457,
      "step": 551
    },
    {
      "epoch": 0.5705426356589147,
      "grad_norm": 0.011826790321771797,
      "learning_rate": 4.10792245074966e-06,
      "loss": 0.5551,
      "step": 552
    },
    {
      "epoch": 0.5715762273901809,
      "grad_norm": 0.0059904650287064595,
      "learning_rate": 4.091432370138646e-06,
      "loss": 0.7705,
      "step": 553
    },
    {
      "epoch": 0.572609819121447,
      "grad_norm": 0.030203938617580396,
      "learning_rate": 4.0749525030940335e-06,
      "loss": 1.0246,
      "step": 554
    },
    {
      "epoch": 0.5736434108527132,
      "grad_norm": 0.005139048409172766,
      "learning_rate": 4.058483034872493e-06,
      "loss": 0.4821,
      "step": 555
    },
    {
      "epoch": 0.5746770025839794,
      "grad_norm": 0.01310923788057172,
      "learning_rate": 4.042024150613798e-06,
      "loss": 0.3482,
      "step": 556
    },
    {
      "epoch": 0.5757105943152455,
      "grad_norm": 0.07863082085142363,
      "learning_rate": 4.025576035338752e-06,
      "loss": 0.4783,
      "step": 557
    },
    {
      "epoch": 0.5767441860465117,
      "grad_norm": 0.004741918037038147,
      "learning_rate": 4.009138873947089e-06,
      "loss": 0.6919,
      "step": 558
    },
    {
      "epoch": 0.5777777777777777,
      "grad_norm": 0.012143729109800166,
      "learning_rate": 3.992712851215411e-06,
      "loss": 0.7686,
      "step": 559
    },
    {
      "epoch": 0.5788113695090439,
      "grad_norm": 0.03592801259396655,
      "learning_rate": 3.976298151795107e-06,
      "loss": 0.6532,
      "step": 560
    },
    {
      "epoch": 0.57984496124031,
      "grad_norm": 0.009757034551942033,
      "learning_rate": 3.959894960210275e-06,
      "loss": 0.785,
      "step": 561
    },
    {
      "epoch": 0.5808785529715762,
      "grad_norm": 0.03596584404567946,
      "learning_rate": 3.9435034608556505e-06,
      "loss": 0.7744,
      "step": 562
    },
    {
      "epoch": 0.5819121447028424,
      "grad_norm": 0.017598251437126042,
      "learning_rate": 3.9271238379945285e-06,
      "loss": 0.424,
      "step": 563
    },
    {
      "epoch": 0.5829457364341085,
      "grad_norm": 0.01060207649183564,
      "learning_rate": 3.9107562757566975e-06,
      "loss": 0.7895,
      "step": 564
    },
    {
      "epoch": 0.5839793281653747,
      "grad_norm": 0.06101927269393801,
      "learning_rate": 3.8944009581363696e-06,
      "loss": 0.6192,
      "step": 565
    },
    {
      "epoch": 0.5850129198966408,
      "grad_norm": 0.009172801737671562,
      "learning_rate": 3.87805806899011e-06,
      "loss": 0.5921,
      "step": 566
    },
    {
      "epoch": 0.586046511627907,
      "grad_norm": 0.011902804157195144,
      "learning_rate": 3.861727792034762e-06,
      "loss": 0.4139,
      "step": 567
    },
    {
      "epoch": 0.5870801033591732,
      "grad_norm": 0.004058765541932425,
      "learning_rate": 3.8454103108454075e-06,
      "loss": 0.5563,
      "step": 568
    },
    {
      "epoch": 0.5881136950904393,
      "grad_norm": 0.02986565157313992,
      "learning_rate": 3.82910580885327e-06,
      "loss": 0.7086,
      "step": 569
    },
    {
      "epoch": 0.5891472868217055,
      "grad_norm": 0.007137284063962577,
      "learning_rate": 3.8128144693436743e-06,
      "loss": 0.9716,
      "step": 570
    },
    {
      "epoch": 0.5901808785529715,
      "grad_norm": 0.011553573838236431,
      "learning_rate": 3.7965364754539845e-06,
      "loss": 0.7065,
      "step": 571
    },
    {
      "epoch": 0.5912144702842377,
      "grad_norm": 0.0017311602303327772,
      "learning_rate": 3.7802720101715355e-06,
      "loss": 0.7369,
      "step": 572
    },
    {
      "epoch": 0.5922480620155038,
      "grad_norm": 0.00819632042682616,
      "learning_rate": 3.764021256331587e-06,
      "loss": 0.7316,
      "step": 573
    },
    {
      "epoch": 0.59328165374677,
      "grad_norm": 0.009657762684974472,
      "learning_rate": 3.747784396615258e-06,
      "loss": 1.0459,
      "step": 574
    },
    {
      "epoch": 0.5943152454780362,
      "grad_norm": 0.006097290291276007,
      "learning_rate": 3.731561613547481e-06,
      "loss": 0.956,
      "step": 575
    },
    {
      "epoch": 0.5953488372093023,
      "grad_norm": 0.007521913040628908,
      "learning_rate": 3.7153530894949476e-06,
      "loss": 0.4066,
      "step": 576
    },
    {
      "epoch": 0.5963824289405685,
      "grad_norm": 0.025618017899116455,
      "learning_rate": 3.699159006664056e-06,
      "loss": 0.8928,
      "step": 577
    },
    {
      "epoch": 0.5974160206718346,
      "grad_norm": 0.007243218437296844,
      "learning_rate": 3.682979547098867e-06,
      "loss": 0.4635,
      "step": 578
    },
    {
      "epoch": 0.5984496124031008,
      "grad_norm": 0.01761379354738488,
      "learning_rate": 3.6668148926790557e-06,
      "loss": 0.7957,
      "step": 579
    },
    {
      "epoch": 0.599483204134367,
      "grad_norm": 0.009021126401123257,
      "learning_rate": 3.6506652251178663e-06,
      "loss": 0.9326,
      "step": 580
    },
    {
      "epoch": 0.6005167958656331,
      "grad_norm": 0.007496113642184525,
      "learning_rate": 3.6345307259600657e-06,
      "loss": 0.8335,
      "step": 581
    },
    {
      "epoch": 0.6015503875968993,
      "grad_norm": 0.008257307464232722,
      "learning_rate": 3.618411576579916e-06,
      "loss": 0.2913,
      "step": 582
    },
    {
      "epoch": 0.6025839793281653,
      "grad_norm": 0.0365491136128324,
      "learning_rate": 3.602307958179117e-06,
      "loss": 1.0378,
      "step": 583
    },
    {
      "epoch": 0.6036175710594315,
      "grad_norm": 0.018348033168397036,
      "learning_rate": 3.586220051784783e-06,
      "loss": 0.6316,
      "step": 584
    },
    {
      "epoch": 0.6046511627906976,
      "grad_norm": 0.0140358281387387,
      "learning_rate": 3.5701480382474047e-06,
      "loss": 0.7818,
      "step": 585
    },
    {
      "epoch": 0.6056847545219638,
      "grad_norm": 0.02743691678567726,
      "learning_rate": 3.554092098238811e-06,
      "loss": 0.4791,
      "step": 586
    },
    {
      "epoch": 0.60671834625323,
      "grad_norm": 0.01123836382582932,
      "learning_rate": 3.538052412250147e-06,
      "loss": 0.5848,
      "step": 587
    },
    {
      "epoch": 0.6077519379844961,
      "grad_norm": 0.012555065172510054,
      "learning_rate": 3.5220291605898354e-06,
      "loss": 0.733,
      "step": 588
    },
    {
      "epoch": 0.6087855297157623,
      "grad_norm": 0.01388812622450263,
      "learning_rate": 3.5060225233815554e-06,
      "loss": 0.7246,
      "step": 589
    },
    {
      "epoch": 0.6098191214470284,
      "grad_norm": 0.0036538702492984946,
      "learning_rate": 3.4900326805622185e-06,
      "loss": 0.3745,
      "step": 590
    },
    {
      "epoch": 0.6108527131782946,
      "grad_norm": 0.00783807778721384,
      "learning_rate": 3.474059811879944e-06,
      "loss": 0.8907,
      "step": 591
    },
    {
      "epoch": 0.6118863049095608,
      "grad_norm": 0.004288727311295845,
      "learning_rate": 3.458104096892031e-06,
      "loss": 0.638,
      "step": 592
    },
    {
      "epoch": 0.6129198966408269,
      "grad_norm": 0.038304730249219786,
      "learning_rate": 3.4421657149629593e-06,
      "loss": 0.7205,
      "step": 593
    },
    {
      "epoch": 0.6139534883720931,
      "grad_norm": 0.011224288038476355,
      "learning_rate": 3.4262448452623514e-06,
      "loss": 0.7502,
      "step": 594
    },
    {
      "epoch": 0.6149870801033591,
      "grad_norm": 0.04221857613062909,
      "learning_rate": 3.410341666762971e-06,
      "loss": 0.6902,
      "step": 595
    },
    {
      "epoch": 0.6160206718346253,
      "grad_norm": 0.01679675570714572,
      "learning_rate": 3.3944563582387084e-06,
      "loss": 0.7589,
      "step": 596
    },
    {
      "epoch": 0.6170542635658914,
      "grad_norm": 0.005635797489238129,
      "learning_rate": 3.3785890982625702e-06,
      "loss": 0.8768,
      "step": 597
    },
    {
      "epoch": 0.6180878552971576,
      "grad_norm": 0.02859639382445036,
      "learning_rate": 3.3627400652046736e-06,
      "loss": 0.8645,
      "step": 598
    },
    {
      "epoch": 0.6191214470284238,
      "grad_norm": 0.014196906493538393,
      "learning_rate": 3.3469094372302374e-06,
      "loss": 0.5522,
      "step": 599
    },
    {
      "epoch": 0.6201550387596899,
      "grad_norm": 0.007364475506423835,
      "learning_rate": 3.331097392297582e-06,
      "loss": 0.6242,
      "step": 600
    },
    {
      "epoch": 0.6211886304909561,
      "grad_norm": 0.09485580654879949,
      "learning_rate": 3.31530410815613e-06,
      "loss": 0.6175,
      "step": 601
    },
    {
      "epoch": 0.6222222222222222,
      "grad_norm": 0.014896868627572802,
      "learning_rate": 3.2995297623444067e-06,
      "loss": 0.9151,
      "step": 602
    },
    {
      "epoch": 0.6232558139534884,
      "grad_norm": 0.039062965631918,
      "learning_rate": 3.283774532188039e-06,
      "loss": 0.6253,
      "step": 603
    },
    {
      "epoch": 0.6242894056847546,
      "grad_norm": 0.0060396438339947135,
      "learning_rate": 3.268038594797777e-06,
      "loss": 0.6744,
      "step": 604
    },
    {
      "epoch": 0.6253229974160207,
      "grad_norm": 0.006502495730153714,
      "learning_rate": 3.2523221270674845e-06,
      "loss": 0.4722,
      "step": 605
    },
    {
      "epoch": 0.6263565891472869,
      "grad_norm": 0.01651900100319116,
      "learning_rate": 3.2366253056721607e-06,
      "loss": 0.5761,
      "step": 606
    },
    {
      "epoch": 0.6273901808785529,
      "grad_norm": 0.09051548320018694,
      "learning_rate": 3.220948307065959e-06,
      "loss": 0.8134,
      "step": 607
    },
    {
      "epoch": 0.6284237726098191,
      "grad_norm": 0.003464287762958902,
      "learning_rate": 3.2052913074801876e-06,
      "loss": 0.7203,
      "step": 608
    },
    {
      "epoch": 0.6294573643410852,
      "grad_norm": 0.008924911210364556,
      "learning_rate": 3.1896544829213444e-06,
      "loss": 0.7852,
      "step": 609
    },
    {
      "epoch": 0.6304909560723514,
      "grad_norm": 0.006312543730714082,
      "learning_rate": 3.17403800916913e-06,
      "loss": 0.2739,
      "step": 610
    },
    {
      "epoch": 0.6315245478036176,
      "grad_norm": 0.0009341299621644434,
      "learning_rate": 3.1584420617744737e-06,
      "loss": 0.7791,
      "step": 611
    },
    {
      "epoch": 0.6325581395348837,
      "grad_norm": 0.02011427119811984,
      "learning_rate": 3.142866816057559e-06,
      "loss": 0.3979,
      "step": 612
    },
    {
      "epoch": 0.6335917312661499,
      "grad_norm": 0.1978096609887796,
      "learning_rate": 3.1273124471058567e-06,
      "loss": 0.5073,
      "step": 613
    },
    {
      "epoch": 0.634625322997416,
      "grad_norm": 0.003926429138889296,
      "learning_rate": 3.1117791297721468e-06,
      "loss": 0.6384,
      "step": 614
    },
    {
      "epoch": 0.6356589147286822,
      "grad_norm": 0.015008664481936326,
      "learning_rate": 3.09626703867257e-06,
      "loss": 0.5018,
      "step": 615
    },
    {
      "epoch": 0.6366925064599483,
      "grad_norm": 0.00578869660282907,
      "learning_rate": 3.0807763481846455e-06,
      "loss": 0.679,
      "step": 616
    },
    {
      "epoch": 0.6377260981912145,
      "grad_norm": 0.008437368076284105,
      "learning_rate": 3.0653072324453226e-06,
      "loss": 0.8456,
      "step": 617
    },
    {
      "epoch": 0.6387596899224807,
      "grad_norm": 0.002752472101030668,
      "learning_rate": 3.049859865349023e-06,
      "loss": 0.5325,
      "step": 618
    },
    {
      "epoch": 0.6397932816537467,
      "grad_norm": 0.021911736100867663,
      "learning_rate": 3.0344344205456807e-06,
      "loss": 0.5609,
      "step": 619
    },
    {
      "epoch": 0.6408268733850129,
      "grad_norm": 0.01816995013648897,
      "learning_rate": 3.0190310714387914e-06,
      "loss": 0.7081,
      "step": 620
    },
    {
      "epoch": 0.641860465116279,
      "grad_norm": 0.0020865529929973204,
      "learning_rate": 3.00364999118347e-06,
      "loss": 0.4725,
      "step": 621
    },
    {
      "epoch": 0.6428940568475452,
      "grad_norm": 0.010085441958720067,
      "learning_rate": 2.988291352684491e-06,
      "loss": 0.3977,
      "step": 622
    },
    {
      "epoch": 0.6439276485788114,
      "grad_norm": 0.010321412887300246,
      "learning_rate": 2.9729553285943587e-06,
      "loss": 0.8165,
      "step": 623
    },
    {
      "epoch": 0.6449612403100775,
      "grad_norm": 0.010355294191339019,
      "learning_rate": 2.9576420913113568e-06,
      "loss": 0.7714,
      "step": 624
    },
    {
      "epoch": 0.6459948320413437,
      "grad_norm": 0.020419592881328177,
      "learning_rate": 2.9423518129776095e-06,
      "loss": 1.0293,
      "step": 625
    },
    {
      "epoch": 0.6470284237726098,
      "grad_norm": 0.002520830347441674,
      "learning_rate": 2.927084665477162e-06,
      "loss": 0.4947,
      "step": 626
    },
    {
      "epoch": 0.648062015503876,
      "grad_norm": 0.006901866115399646,
      "learning_rate": 2.9118408204340244e-06,
      "loss": 0.5765,
      "step": 627
    },
    {
      "epoch": 0.6490956072351421,
      "grad_norm": 0.01018467808404735,
      "learning_rate": 2.8966204492102606e-06,
      "loss": 0.7652,
      "step": 628
    },
    {
      "epoch": 0.6501291989664083,
      "grad_norm": 0.02703357936715574,
      "learning_rate": 2.8814237229040556e-06,
      "loss": 0.7292,
      "step": 629
    },
    {
      "epoch": 0.6511627906976745,
      "grad_norm": 0.0016202796325287287,
      "learning_rate": 2.866250812347795e-06,
      "loss": 0.4138,
      "step": 630
    },
    {
      "epoch": 0.6521963824289405,
      "grad_norm": 0.0015464166082684323,
      "learning_rate": 2.8511018881061347e-06,
      "loss": 0.7175,
      "step": 631
    },
    {
      "epoch": 0.6532299741602067,
      "grad_norm": 0.0026867085170827433,
      "learning_rate": 2.8359771204741e-06,
      "loss": 0.6615,
      "step": 632
    },
    {
      "epoch": 0.6542635658914728,
      "grad_norm": 0.00955541417701338,
      "learning_rate": 2.8208766794751518e-06,
      "loss": 0.661,
      "step": 633
    },
    {
      "epoch": 0.655297157622739,
      "grad_norm": 0.005210464167177706,
      "learning_rate": 2.8058007348593003e-06,
      "loss": 0.4978,
      "step": 634
    },
    {
      "epoch": 0.6563307493540051,
      "grad_norm": 0.0031871170878113044,
      "learning_rate": 2.7907494561011693e-06,
      "loss": 0.959,
      "step": 635
    },
    {
      "epoch": 0.6573643410852713,
      "grad_norm": 0.003046856237404386,
      "learning_rate": 2.775723012398107e-06,
      "loss": 0.687,
      "step": 636
    },
    {
      "epoch": 0.6583979328165375,
      "grad_norm": 0.025585480092641298,
      "learning_rate": 2.760721572668284e-06,
      "loss": 0.3055,
      "step": 637
    },
    {
      "epoch": 0.6594315245478036,
      "grad_norm": 0.01156144100781046,
      "learning_rate": 2.745745305548793e-06,
      "loss": 0.3886,
      "step": 638
    },
    {
      "epoch": 0.6604651162790698,
      "grad_norm": 0.029727193309962312,
      "learning_rate": 2.730794379393742e-06,
      "loss": 0.324,
      "step": 639
    },
    {
      "epoch": 0.661498708010336,
      "grad_norm": 0.016920974338993942,
      "learning_rate": 2.7158689622723816e-06,
      "loss": 0.3964,
      "step": 640
    },
    {
      "epoch": 0.6625322997416021,
      "grad_norm": 0.014134717424229625,
      "learning_rate": 2.7009692219672025e-06,
      "loss": 0.8558,
      "step": 641
    },
    {
      "epoch": 0.6635658914728683,
      "grad_norm": 0.02231126937931001,
      "learning_rate": 2.6860953259720473e-06,
      "loss": 0.441,
      "step": 642
    },
    {
      "epoch": 0.6645994832041343,
      "grad_norm": 0.01205752837800655,
      "learning_rate": 2.67124744149024e-06,
      "loss": 0.7614,
      "step": 643
    },
    {
      "epoch": 0.6656330749354005,
      "grad_norm": 0.02218156709929171,
      "learning_rate": 2.6564257354326915e-06,
      "loss": 0.769,
      "step": 644
    },
    {
      "epoch": 0.6666666666666666,
      "grad_norm": 0.014893167370598162,
      "learning_rate": 2.641630374416036e-06,
      "loss": 0.5223,
      "step": 645
    },
    {
      "epoch": 0.6677002583979328,
      "grad_norm": 0.01403455637021974,
      "learning_rate": 2.6268615247607533e-06,
      "loss": 0.6339,
      "step": 646
    },
    {
      "epoch": 0.668733850129199,
      "grad_norm": 0.015566420897117971,
      "learning_rate": 2.612119352489292e-06,
      "loss": 0.5814,
      "step": 647
    },
    {
      "epoch": 0.6697674418604651,
      "grad_norm": 0.002122407530651032,
      "learning_rate": 2.597404023324217e-06,
      "loss": 0.6307,
      "step": 648
    },
    {
      "epoch": 0.6708010335917313,
      "grad_norm": 0.006599386161938344,
      "learning_rate": 2.582715702686337e-06,
      "loss": 0.5231,
      "step": 649
    },
    {
      "epoch": 0.6718346253229974,
      "grad_norm": 0.016354568592818277,
      "learning_rate": 2.5680545556928438e-06,
      "loss": 0.6152,
      "step": 650
    },
    {
      "epoch": 0.6728682170542636,
      "grad_norm": 0.003625740048276167,
      "learning_rate": 2.5534207471554644e-06,
      "loss": 0.6031,
      "step": 651
    },
    {
      "epoch": 0.6739018087855297,
      "grad_norm": 0.008986913575988117,
      "learning_rate": 2.5388144415786026e-06,
      "loss": 0.6125,
      "step": 652
    },
    {
      "epoch": 0.6749354005167959,
      "grad_norm": 0.010723552992309864,
      "learning_rate": 2.5242358031574853e-06,
      "loss": 0.7357,
      "step": 653
    },
    {
      "epoch": 0.6759689922480621,
      "grad_norm": 0.06098096371639942,
      "learning_rate": 2.509684995776329e-06,
      "loss": 0.819,
      "step": 654
    },
    {
      "epoch": 0.6770025839793282,
      "grad_norm": 0.0010713797947024342,
      "learning_rate": 2.4951621830064887e-06,
      "loss": 0.855,
      "step": 655
    },
    {
      "epoch": 0.6780361757105943,
      "grad_norm": 0.027456563946239576,
      "learning_rate": 2.480667528104617e-06,
      "loss": 0.816,
      "step": 656
    },
    {
      "epoch": 0.6790697674418604,
      "grad_norm": 0.008477741598219566,
      "learning_rate": 2.4662011940108383e-06,
      "loss": 0.4875,
      "step": 657
    },
    {
      "epoch": 0.6801033591731266,
      "grad_norm": 0.004334244281750061,
      "learning_rate": 2.4517633433469062e-06,
      "loss": 0.3076,
      "step": 658
    },
    {
      "epoch": 0.6811369509043927,
      "grad_norm": 0.009911692985877999,
      "learning_rate": 2.437354138414385e-06,
      "loss": 0.9015,
      "step": 659
    },
    {
      "epoch": 0.6821705426356589,
      "grad_norm": 0.0018047293874378404,
      "learning_rate": 2.4229737411928222e-06,
      "loss": 0.6752,
      "step": 660
    },
    {
      "epoch": 0.6832041343669251,
      "grad_norm": 0.008895823931468157,
      "learning_rate": 2.40862231333792e-06,
      "loss": 0.8797,
      "step": 661
    },
    {
      "epoch": 0.6842377260981912,
      "grad_norm": 0.0014523810492770642,
      "learning_rate": 2.3943000161797304e-06,
      "loss": 0.6763,
      "step": 662
    },
    {
      "epoch": 0.6852713178294574,
      "grad_norm": 0.03433477657377308,
      "learning_rate": 2.3800070107208355e-06,
      "loss": 0.8591,
      "step": 663
    },
    {
      "epoch": 0.6863049095607235,
      "grad_norm": 0.0009164810473600216,
      "learning_rate": 2.365743457634533e-06,
      "loss": 0.5948,
      "step": 664
    },
    {
      "epoch": 0.6873385012919897,
      "grad_norm": 0.010825711835904145,
      "learning_rate": 2.351509517263041e-06,
      "loss": 0.5315,
      "step": 665
    },
    {
      "epoch": 0.6883720930232559,
      "grad_norm": 0.010633847164323947,
      "learning_rate": 2.3373053496156865e-06,
      "loss": 0.4645,
      "step": 666
    },
    {
      "epoch": 0.689405684754522,
      "grad_norm": 0.020471877228394286,
      "learning_rate": 2.3231311143671077e-06,
      "loss": 0.3931,
      "step": 667
    },
    {
      "epoch": 0.6904392764857881,
      "grad_norm": 0.03370924633666957,
      "learning_rate": 2.308986970855466e-06,
      "loss": 0.6611,
      "step": 668
    },
    {
      "epoch": 0.6914728682170542,
      "grad_norm": 0.007618510539879816,
      "learning_rate": 2.2948730780806407e-06,
      "loss": 0.8877,
      "step": 669
    },
    {
      "epoch": 0.6925064599483204,
      "grad_norm": 0.02653144096649351,
      "learning_rate": 2.2807895947024643e-06,
      "loss": 0.7268,
      "step": 670
    },
    {
      "epoch": 0.6935400516795865,
      "grad_norm": 0.032392931283786675,
      "learning_rate": 2.2667366790389152e-06,
      "loss": 0.9414,
      "step": 671
    },
    {
      "epoch": 0.6945736434108527,
      "grad_norm": 0.0048701560137510876,
      "learning_rate": 2.2527144890643465e-06,
      "loss": 0.5213,
      "step": 672
    },
    {
      "epoch": 0.6956072351421189,
      "grad_norm": 0.01017989295659803,
      "learning_rate": 2.2387231824077188e-06,
      "loss": 0.7436,
      "step": 673
    },
    {
      "epoch": 0.696640826873385,
      "grad_norm": 0.011454168086977502,
      "learning_rate": 2.2247629163508207e-06,
      "loss": 0.511,
      "step": 674
    },
    {
      "epoch": 0.6976744186046512,
      "grad_norm": 0.05133513418648132,
      "learning_rate": 2.2108338478264934e-06,
      "loss": 0.5181,
      "step": 675
    },
    {
      "epoch": 0.6987080103359173,
      "grad_norm": 0.006370243089094228,
      "learning_rate": 2.196936133416882e-06,
      "loss": 0.2995,
      "step": 676
    },
    {
      "epoch": 0.6997416020671835,
      "grad_norm": 0.09725064571569494,
      "learning_rate": 2.1830699293516677e-06,
      "loss": 0.8599,
      "step": 677
    },
    {
      "epoch": 0.7007751937984497,
      "grad_norm": 0.0215394288697926,
      "learning_rate": 2.1692353915063047e-06,
      "loss": 0.3789,
      "step": 678
    },
    {
      "epoch": 0.7018087855297158,
      "grad_norm": 0.008436642259748045,
      "learning_rate": 2.155432675400283e-06,
      "loss": 0.7591,
      "step": 679
    },
    {
      "epoch": 0.7028423772609819,
      "grad_norm": 0.03525064931094375,
      "learning_rate": 2.141661936195364e-06,
      "loss": 0.8818,
      "step": 680
    },
    {
      "epoch": 0.703875968992248,
      "grad_norm": 0.008044780804442146,
      "learning_rate": 2.1279233286938503e-06,
      "loss": 1.1236,
      "step": 681
    },
    {
      "epoch": 0.7049095607235142,
      "grad_norm": 0.009770617014590978,
      "learning_rate": 2.1142170073368396e-06,
      "loss": 1.1511,
      "step": 682
    },
    {
      "epoch": 0.7059431524547803,
      "grad_norm": 0.006708023984698728,
      "learning_rate": 2.100543126202481e-06,
      "loss": 0.3546,
      "step": 683
    },
    {
      "epoch": 0.7069767441860465,
      "grad_norm": 0.0023253281385663747,
      "learning_rate": 2.0869018390042588e-06,
      "loss": 0.9832,
      "step": 684
    },
    {
      "epoch": 0.7080103359173127,
      "grad_norm": 0.011519621039001124,
      "learning_rate": 2.0732932990892528e-06,
      "loss": 1.2085,
      "step": 685
    },
    {
      "epoch": 0.7090439276485788,
      "grad_norm": 0.018736153267084898,
      "learning_rate": 2.059717659436415e-06,
      "loss": 0.5925,
      "step": 686
    },
    {
      "epoch": 0.710077519379845,
      "grad_norm": 0.03453876592418535,
      "learning_rate": 2.0461750726548558e-06,
      "loss": 0.5257,
      "step": 687
    },
    {
      "epoch": 0.7111111111111111,
      "grad_norm": 0.010838200213403048,
      "learning_rate": 2.032665690982126e-06,
      "loss": 0.6674,
      "step": 688
    },
    {
      "epoch": 0.7121447028423773,
      "grad_norm": 0.033157981270379465,
      "learning_rate": 2.0191896662825012e-06,
      "loss": 0.8682,
      "step": 689
    },
    {
      "epoch": 0.7131782945736435,
      "grad_norm": 0.006693930147784915,
      "learning_rate": 2.0057471500452822e-06,
      "loss": 0.7901,
      "step": 690
    },
    {
      "epoch": 0.7142118863049096,
      "grad_norm": 0.007992638103915274,
      "learning_rate": 1.9923382933830836e-06,
      "loss": 0.5136,
      "step": 691
    },
    {
      "epoch": 0.7152454780361757,
      "grad_norm": 0.008402576233199083,
      "learning_rate": 1.9789632470301423e-06,
      "loss": 0.597,
      "step": 692
    },
    {
      "epoch": 0.7162790697674418,
      "grad_norm": 0.01295557302148158,
      "learning_rate": 1.9656221613406217e-06,
      "loss": 0.6499,
      "step": 693
    },
    {
      "epoch": 0.717312661498708,
      "grad_norm": 0.0109815325594314,
      "learning_rate": 1.952315186286915e-06,
      "loss": 1.1922,
      "step": 694
    },
    {
      "epoch": 0.7183462532299741,
      "grad_norm": 0.0019463878853608347,
      "learning_rate": 1.9390424714579683e-06,
      "loss": 1.0341,
      "step": 695
    },
    {
      "epoch": 0.7193798449612403,
      "grad_norm": 0.023635233380624232,
      "learning_rate": 1.925804166057596e-06,
      "loss": 0.6396,
      "step": 696
    },
    {
      "epoch": 0.7204134366925065,
      "grad_norm": 0.005120744370696843,
      "learning_rate": 1.9126004189027975e-06,
      "loss": 0.3227,
      "step": 697
    },
    {
      "epoch": 0.7214470284237726,
      "grad_norm": 0.027514689723732923,
      "learning_rate": 1.8994313784220942e-06,
      "loss": 1.0453,
      "step": 698
    },
    {
      "epoch": 0.7224806201550388,
      "grad_norm": 0.0027267227829363924,
      "learning_rate": 1.8862971926538553e-06,
      "loss": 0.5588,
      "step": 699
    },
    {
      "epoch": 0.7235142118863049,
      "grad_norm": 0.08718772843465097,
      "learning_rate": 1.8731980092446305e-06,
      "loss": 0.5745,
      "step": 700
    },
    {
      "epoch": 0.7245478036175711,
      "grad_norm": 0.007742481632428125,
      "learning_rate": 1.8601339754475007e-06,
      "loss": 0.4181,
      "step": 701
    },
    {
      "epoch": 0.7255813953488373,
      "grad_norm": 0.03726437592538067,
      "learning_rate": 1.8471052381204091e-06,
      "loss": 1.1237,
      "step": 702
    },
    {
      "epoch": 0.7266149870801034,
      "grad_norm": 0.011449708286381127,
      "learning_rate": 1.8341119437245231e-06,
      "loss": 0.6414,
      "step": 703
    },
    {
      "epoch": 0.7276485788113695,
      "grad_norm": 0.009305701834098934,
      "learning_rate": 1.8211542383225811e-06,
      "loss": 0.6433,
      "step": 704
    },
    {
      "epoch": 0.7286821705426356,
      "grad_norm": 0.010671546593769654,
      "learning_rate": 1.8082322675772478e-06,
      "loss": 1.1493,
      "step": 705
    },
    {
      "epoch": 0.7297157622739018,
      "grad_norm": 0.0018602280088677243,
      "learning_rate": 1.795346176749484e-06,
      "loss": 0.5037,
      "step": 706
    },
    {
      "epoch": 0.7307493540051679,
      "grad_norm": 0.0007885159717911003,
      "learning_rate": 1.7824961106969124e-06,
      "loss": 0.4834,
      "step": 707
    },
    {
      "epoch": 0.7317829457364341,
      "grad_norm": 0.011880864002012827,
      "learning_rate": 1.7696822138721798e-06,
      "loss": 0.7377,
      "step": 708
    },
    {
      "epoch": 0.7328165374677003,
      "grad_norm": 0.025620016208210236,
      "learning_rate": 1.756904630321347e-06,
      "loss": 0.5515,
      "step": 709
    },
    {
      "epoch": 0.7338501291989664,
      "grad_norm": 0.008994446868076742,
      "learning_rate": 1.7441635036822624e-06,
      "loss": 0.4303,
      "step": 710
    },
    {
      "epoch": 0.7348837209302326,
      "grad_norm": 0.009832900535250098,
      "learning_rate": 1.7314589771829426e-06,
      "loss": 0.4719,
      "step": 711
    },
    {
      "epoch": 0.7359173126614987,
      "grad_norm": 0.009848681222892773,
      "learning_rate": 1.718791193639973e-06,
      "loss": 0.6931,
      "step": 712
    },
    {
      "epoch": 0.7369509043927649,
      "grad_norm": 0.015526483057121305,
      "learning_rate": 1.706160295456898e-06,
      "loss": 0.8387,
      "step": 713
    },
    {
      "epoch": 0.737984496124031,
      "grad_norm": 0.019454557773862527,
      "learning_rate": 1.693566424622612e-06,
      "loss": 0.7906,
      "step": 714
    },
    {
      "epoch": 0.7390180878552972,
      "grad_norm": 0.01571287388883206,
      "learning_rate": 1.6810097227097782e-06,
      "loss": 0.8704,
      "step": 715
    },
    {
      "epoch": 0.7400516795865633,
      "grad_norm": 0.01679962154441663,
      "learning_rate": 1.668490330873223e-06,
      "loss": 0.8328,
      "step": 716
    },
    {
      "epoch": 0.7410852713178294,
      "grad_norm": 0.0003024686536669724,
      "learning_rate": 1.6560083898483598e-06,
      "loss": 0.5538,
      "step": 717
    },
    {
      "epoch": 0.7421188630490956,
      "grad_norm": 0.009524882717537417,
      "learning_rate": 1.6435640399496033e-06,
      "loss": 0.7881,
      "step": 718
    },
    {
      "epoch": 0.7431524547803617,
      "grad_norm": 0.01589038696180367,
      "learning_rate": 1.6311574210687865e-06,
      "loss": 0.6789,
      "step": 719
    },
    {
      "epoch": 0.7441860465116279,
      "grad_norm": 0.021604641208150203,
      "learning_rate": 1.618788672673598e-06,
      "loss": 0.8453,
      "step": 720
    },
    {
      "epoch": 0.7452196382428941,
      "grad_norm": 0.0175226673994578,
      "learning_rate": 1.6064579338060088e-06,
      "loss": 0.5789,
      "step": 721
    },
    {
      "epoch": 0.7462532299741602,
      "grad_norm": 0.020082884853953008,
      "learning_rate": 1.5941653430807052e-06,
      "loss": 0.6265,
      "step": 722
    },
    {
      "epoch": 0.7472868217054264,
      "grad_norm": 0.009837537932587495,
      "learning_rate": 1.5819110386835413e-06,
      "loss": 0.366,
      "step": 723
    },
    {
      "epoch": 0.7483204134366925,
      "grad_norm": 0.013333997810691178,
      "learning_rate": 1.5696951583699776e-06,
      "loss": 0.8718,
      "step": 724
    },
    {
      "epoch": 0.7493540051679587,
      "grad_norm": 0.002463295622274699,
      "learning_rate": 1.5575178394635315e-06,
      "loss": 0.6202,
      "step": 725
    },
    {
      "epoch": 0.7503875968992249,
      "grad_norm": 0.008162642435186026,
      "learning_rate": 1.545379218854241e-06,
      "loss": 1.1227,
      "step": 726
    },
    {
      "epoch": 0.751421188630491,
      "grad_norm": 0.026341878493428644,
      "learning_rate": 1.5332794329971157e-06,
      "loss": 1.0924,
      "step": 727
    },
    {
      "epoch": 0.7524547803617571,
      "grad_norm": 0.014709730816084543,
      "learning_rate": 1.5212186179106142e-06,
      "loss": 0.5094,
      "step": 728
    },
    {
      "epoch": 0.7534883720930232,
      "grad_norm": 0.012845597387523151,
      "learning_rate": 1.5091969091751073e-06,
      "loss": 0.6114,
      "step": 729
    },
    {
      "epoch": 0.7545219638242894,
      "grad_norm": 0.007796756582736304,
      "learning_rate": 1.4972144419313528e-06,
      "loss": 0.6696,
      "step": 730
    },
    {
      "epoch": 0.7555555555555555,
      "grad_norm": 0.015297657900005535,
      "learning_rate": 1.4852713508789835e-06,
      "loss": 0.7264,
      "step": 731
    },
    {
      "epoch": 0.7565891472868217,
      "grad_norm": 0.00930048666519971,
      "learning_rate": 1.4733677702749894e-06,
      "loss": 1.1346,
      "step": 732
    },
    {
      "epoch": 0.7576227390180879,
      "grad_norm": 0.006262966227623613,
      "learning_rate": 1.4615038339322025e-06,
      "loss": 0.5167,
      "step": 733
    },
    {
      "epoch": 0.758656330749354,
      "grad_norm": 0.03172037902662958,
      "learning_rate": 1.4496796752178032e-06,
      "loss": 1.0521,
      "step": 734
    },
    {
      "epoch": 0.7596899224806202,
      "grad_norm": 0.018184964848633134,
      "learning_rate": 1.4378954270518169e-06,
      "loss": 1.1405,
      "step": 735
    },
    {
      "epoch": 0.7607235142118863,
      "grad_norm": 0.02625596747373729,
      "learning_rate": 1.4261512219056118e-06,
      "loss": 0.8989,
      "step": 736
    },
    {
      "epoch": 0.7617571059431525,
      "grad_norm": 0.0065062795304784565,
      "learning_rate": 1.4144471918004255e-06,
      "loss": 0.3899,
      "step": 737
    },
    {
      "epoch": 0.7627906976744186,
      "grad_norm": 0.01927512689026163,
      "learning_rate": 1.402783468305864e-06,
      "loss": 0.8255,
      "step": 738
    },
    {
      "epoch": 0.7638242894056848,
      "grad_norm": 0.003184033803095864,
      "learning_rate": 1.391160182538437e-06,
      "loss": 1.3026,
      "step": 739
    },
    {
      "epoch": 0.7648578811369509,
      "grad_norm": 0.005059085836242208,
      "learning_rate": 1.3795774651600757e-06,
      "loss": 0.6628,
      "step": 740
    },
    {
      "epoch": 0.765891472868217,
      "grad_norm": 0.01702957285311037,
      "learning_rate": 1.3680354463766642e-06,
      "loss": 0.8205,
      "step": 741
    },
    {
      "epoch": 0.7669250645994832,
      "grad_norm": 0.012838351449086767,
      "learning_rate": 1.3565342559365808e-06,
      "loss": 0.4499,
      "step": 742
    },
    {
      "epoch": 0.7679586563307493,
      "grad_norm": 0.02076776695674655,
      "learning_rate": 1.3450740231292354e-06,
      "loss": 0.8441,
      "step": 743
    },
    {
      "epoch": 0.7689922480620155,
      "grad_norm": 0.02421010717274923,
      "learning_rate": 1.3336548767836144e-06,
      "loss": 0.805,
      "step": 744
    },
    {
      "epoch": 0.7700258397932817,
      "grad_norm": 0.014142641934300626,
      "learning_rate": 1.3222769452668382e-06,
      "loss": 0.4157,
      "step": 745
    },
    {
      "epoch": 0.7710594315245478,
      "grad_norm": 0.036844953658085815,
      "learning_rate": 1.3109403564827155e-06,
      "loss": 0.9491,
      "step": 746
    },
    {
      "epoch": 0.772093023255814,
      "grad_norm": 0.006128254461386621,
      "learning_rate": 1.2996452378703013e-06,
      "loss": 0.6248,
      "step": 747
    },
    {
      "epoch": 0.7731266149870801,
      "grad_norm": 0.0669139602435429,
      "learning_rate": 1.2883917164024722e-06,
      "loss": 0.6374,
      "step": 748
    },
    {
      "epoch": 0.7741602067183463,
      "grad_norm": 0.007946312171954946,
      "learning_rate": 1.2771799185844913e-06,
      "loss": 1.066,
      "step": 749
    },
    {
      "epoch": 0.7751937984496124,
      "grad_norm": 0.02370322871693566,
      "learning_rate": 1.266009970452593e-06,
      "loss": 0.6947,
      "step": 750
    },
    {
      "epoch": 0.7762273901808786,
      "grad_norm": 0.0013412132628990694,
      "learning_rate": 1.2548819975725624e-06,
      "loss": 1.2011,
      "step": 751
    },
    {
      "epoch": 0.7772609819121447,
      "grad_norm": 0.016836999205720907,
      "learning_rate": 1.2437961250383207e-06,
      "loss": 0.6446,
      "step": 752
    },
    {
      "epoch": 0.7782945736434108,
      "grad_norm": 0.004910590370901763,
      "learning_rate": 1.2327524774705268e-06,
      "loss": 0.6683,
      "step": 753
    },
    {
      "epoch": 0.779328165374677,
      "grad_norm": 0.001674177916558112,
      "learning_rate": 1.221751179015172e-06,
      "loss": 0.7239,
      "step": 754
    },
    {
      "epoch": 0.7803617571059431,
      "grad_norm": 0.0314645317625172,
      "learning_rate": 1.2107923533421795e-06,
      "loss": 0.5538,
      "step": 755
    },
    {
      "epoch": 0.7813953488372093,
      "grad_norm": 0.007931688504401183,
      "learning_rate": 1.1998761236440248e-06,
      "loss": 0.7283,
      "step": 756
    },
    {
      "epoch": 0.7824289405684755,
      "grad_norm": 0.0028398690937157254,
      "learning_rate": 1.1890026126343446e-06,
      "loss": 0.6823,
      "step": 757
    },
    {
      "epoch": 0.7834625322997416,
      "grad_norm": 0.008586854102010818,
      "learning_rate": 1.1781719425465538e-06,
      "loss": 0.6256,
      "step": 758
    },
    {
      "epoch": 0.7844961240310078,
      "grad_norm": 0.009731444759134488,
      "learning_rate": 1.1673842351324816e-06,
      "loss": 0.405,
      "step": 759
    },
    {
      "epoch": 0.7855297157622739,
      "grad_norm": 0.04277079111530455,
      "learning_rate": 1.1566396116609907e-06,
      "loss": 0.8424,
      "step": 760
    },
    {
      "epoch": 0.7865633074935401,
      "grad_norm": 0.006618115874737083,
      "learning_rate": 1.1459381929166251e-06,
      "loss": 0.9606,
      "step": 761
    },
    {
      "epoch": 0.7875968992248062,
      "grad_norm": 0.00441882417329326,
      "learning_rate": 1.1352800991982467e-06,
      "loss": 0.7962,
      "step": 762
    },
    {
      "epoch": 0.7886304909560724,
      "grad_norm": 0.00918492428636694,
      "learning_rate": 1.1246654503176795e-06,
      "loss": 0.9351,
      "step": 763
    },
    {
      "epoch": 0.7896640826873385,
      "grad_norm": 0.010466158967119069,
      "learning_rate": 1.1140943655983727e-06,
      "loss": 0.8527,
      "step": 764
    },
    {
      "epoch": 0.7906976744186046,
      "grad_norm": 0.011487243429554032,
      "learning_rate": 1.103566963874052e-06,
      "loss": 0.8138,
      "step": 765
    },
    {
      "epoch": 0.7917312661498708,
      "grad_norm": 0.009996166008936876,
      "learning_rate": 1.0930833634873811e-06,
      "loss": 0.3519,
      "step": 766
    },
    {
      "epoch": 0.7927648578811369,
      "grad_norm": 0.010219568063372615,
      "learning_rate": 1.082643682288641e-06,
      "loss": 1.1793,
      "step": 767
    },
    {
      "epoch": 0.7937984496124031,
      "grad_norm": 0.011265870570672168,
      "learning_rate": 1.0722480376343997e-06,
      "loss": 0.5691,
      "step": 768
    },
    {
      "epoch": 0.7948320413436692,
      "grad_norm": 0.022784360993054982,
      "learning_rate": 1.0618965463861868e-06,
      "loss": 0.9045,
      "step": 769
    },
    {
      "epoch": 0.7958656330749354,
      "grad_norm": 0.046656981458534794,
      "learning_rate": 1.0515893249091936e-06,
      "loss": 0.6292,
      "step": 770
    },
    {
      "epoch": 0.7968992248062016,
      "grad_norm": 0.02666693125452592,
      "learning_rate": 1.0413264890709546e-06,
      "loss": 0.8287,
      "step": 771
    },
    {
      "epoch": 0.7979328165374677,
      "grad_norm": 0.003680981945469294,
      "learning_rate": 1.0311081542400452e-06,
      "loss": 0.5037,
      "step": 772
    },
    {
      "epoch": 0.7989664082687339,
      "grad_norm": 0.0033066332079855444,
      "learning_rate": 1.0209344352847923e-06,
      "loss": 1.0197,
      "step": 773
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.014462236556167561,
      "learning_rate": 1.0108054465719736e-06,
      "loss": 0.7989,
      "step": 774
    },
    {
      "epoch": 0.8010335917312662,
      "grad_norm": 0.07568178969913629,
      "learning_rate": 1.0007213019655393e-06,
      "loss": 0.6792,
      "step": 775
    },
    {
      "epoch": 0.8020671834625323,
      "grad_norm": 0.003341580679161255,
      "learning_rate": 9.906821148253303e-07,
      "loss": 0.3393,
      "step": 776
    },
    {
      "epoch": 0.8031007751937984,
      "grad_norm": 0.06218255374529516,
      "learning_rate": 9.806879980057993e-07,
      "loss": 0.9721,
      "step": 777
    },
    {
      "epoch": 0.8041343669250646,
      "grad_norm": 0.012734944914829896,
      "learning_rate": 9.707390638547482e-07,
      "loss": 0.6886,
      "step": 778
    },
    {
      "epoch": 0.8051679586563307,
      "grad_norm": 0.017415823154355903,
      "learning_rate": 9.608354242120637e-07,
      "loss": 0.8675,
      "step": 779
    },
    {
      "epoch": 0.8062015503875969,
      "grad_norm": 0.01481115217517737,
      "learning_rate": 9.509771904084558e-07,
      "loss": 0.9537,
      "step": 780
    },
    {
      "epoch": 0.807235142118863,
      "grad_norm": 0.013919248318989106,
      "learning_rate": 9.411644732642122e-07,
      "loss": 0.6679,
      "step": 781
    },
    {
      "epoch": 0.8082687338501292,
      "grad_norm": 0.010757304270507128,
      "learning_rate": 9.313973830879514e-07,
      "loss": 0.3185,
      "step": 782
    },
    {
      "epoch": 0.8093023255813954,
      "grad_norm": 0.1055068052841767,
      "learning_rate": 9.216760296753758e-07,
      "loss": 0.9586,
      "step": 783
    },
    {
      "epoch": 0.8103359173126615,
      "grad_norm": 0.021910880540130236,
      "learning_rate": 9.120005223080486e-07,
      "loss": 0.3148,
      "step": 784
    },
    {
      "epoch": 0.8113695090439277,
      "grad_norm": 0.008577024632026512,
      "learning_rate": 9.023709697521543e-07,
      "loss": 1.0179,
      "step": 785
    },
    {
      "epoch": 0.8124031007751938,
      "grad_norm": 0.012814300375320957,
      "learning_rate": 8.927874802572861e-07,
      "loss": 0.9413,
      "step": 786
    },
    {
      "epoch": 0.81343669250646,
      "grad_norm": 0.015832148531327805,
      "learning_rate": 8.832501615552225e-07,
      "loss": 1.0149,
      "step": 787
    },
    {
      "epoch": 0.814470284237726,
      "grad_norm": 0.05411335109792408,
      "learning_rate": 8.737591208587159e-07,
      "loss": 0.6907,
      "step": 788
    },
    {
      "epoch": 0.8155038759689922,
      "grad_norm": 0.008327991000698103,
      "learning_rate": 8.643144648602913e-07,
      "loss": 0.9588,
      "step": 789
    },
    {
      "epoch": 0.8165374677002584,
      "grad_norm": 0.005805417467512883,
      "learning_rate": 8.549162997310467e-07,
      "loss": 0.8366,
      "step": 790
    },
    {
      "epoch": 0.8175710594315245,
      "grad_norm": 0.001981105246704123,
      "learning_rate": 8.455647311194537e-07,
      "loss": 0.9282,
      "step": 791
    },
    {
      "epoch": 0.8186046511627907,
      "grad_norm": 0.016386493052963098,
      "learning_rate": 8.362598641501774e-07,
      "loss": 0.6792,
      "step": 792
    },
    {
      "epoch": 0.8196382428940568,
      "grad_norm": 0.008489317394146743,
      "learning_rate": 8.270018034228916e-07,
      "loss": 0.75,
      "step": 793
    },
    {
      "epoch": 0.820671834625323,
      "grad_norm": 0.028636949915044684,
      "learning_rate": 8.177906530110996e-07,
      "loss": 0.4912,
      "step": 794
    },
    {
      "epoch": 0.8217054263565892,
      "grad_norm": 0.020021342266682663,
      "learning_rate": 8.086265164609708e-07,
      "loss": 0.6465,
      "step": 795
    },
    {
      "epoch": 0.8227390180878553,
      "grad_norm": 0.014289254480364803,
      "learning_rate": 7.995094967901701e-07,
      "loss": 0.7983,
      "step": 796
    },
    {
      "epoch": 0.8237726098191215,
      "grad_norm": 0.14674912907929263,
      "learning_rate": 7.90439696486705e-07,
      "loss": 0.8931,
      "step": 797
    },
    {
      "epoch": 0.8248062015503876,
      "grad_norm": 0.26537833114021286,
      "learning_rate": 7.814172175077738e-07,
      "loss": 1.0798,
      "step": 798
    },
    {
      "epoch": 0.8258397932816538,
      "grad_norm": 0.0048491888263990915,
      "learning_rate": 7.724421612786109e-07,
      "loss": 0.7443,
      "step": 799
    },
    {
      "epoch": 0.8268733850129198,
      "grad_norm": 0.016797302732865286,
      "learning_rate": 7.635146286913587e-07,
      "loss": 0.6982,
      "step": 800
    },
    {
      "epoch": 0.827906976744186,
      "grad_norm": 0.009523641921786587,
      "learning_rate": 7.546347201039255e-07,
      "loss": 0.4177,
      "step": 801
    },
    {
      "epoch": 0.8289405684754522,
      "grad_norm": 0.041325202994119306,
      "learning_rate": 7.458025353388592e-07,
      "loss": 0.7391,
      "step": 802
    },
    {
      "epoch": 0.8299741602067183,
      "grad_norm": 0.014128068619299973,
      "learning_rate": 7.37018173682223e-07,
      "loss": 0.8808,
      "step": 803
    },
    {
      "epoch": 0.8310077519379845,
      "grad_norm": 0.02272205551762472,
      "learning_rate": 7.282817338824893e-07,
      "loss": 0.6888,
      "step": 804
    },
    {
      "epoch": 0.8320413436692506,
      "grad_norm": 0.003620897622866349,
      "learning_rate": 7.195933141494133e-07,
      "loss": 1.0926,
      "step": 805
    },
    {
      "epoch": 0.8330749354005168,
      "grad_norm": 0.04307457354640205,
      "learning_rate": 7.109530121529439e-07,
      "loss": 1.154,
      "step": 806
    },
    {
      "epoch": 0.834108527131783,
      "grad_norm": 0.0038982136430237672,
      "learning_rate": 7.023609250221153e-07,
      "loss": 0.7326,
      "step": 807
    },
    {
      "epoch": 0.8351421188630491,
      "grad_norm": 0.015118100917288258,
      "learning_rate": 6.938171493439622e-07,
      "loss": 1.1292,
      "step": 808
    },
    {
      "epoch": 0.8361757105943153,
      "grad_norm": 0.005336180984322705,
      "learning_rate": 6.853217811624313e-07,
      "loss": 0.7342,
      "step": 809
    },
    {
      "epoch": 0.8372093023255814,
      "grad_norm": 0.02259716156838843,
      "learning_rate": 6.768749159772992e-07,
      "loss": 0.9438,
      "step": 810
    },
    {
      "epoch": 0.8382428940568476,
      "grad_norm": 0.028913521621850513,
      "learning_rate": 6.684766487431027e-07,
      "loss": 0.7678,
      "step": 811
    },
    {
      "epoch": 0.8392764857881136,
      "grad_norm": 0.00029061638247003573,
      "learning_rate": 6.601270738680721e-07,
      "loss": 1.1905,
      "step": 812
    },
    {
      "epoch": 0.8403100775193798,
      "grad_norm": 0.016053911287338785,
      "learning_rate": 6.518262852130625e-07,
      "loss": 0.7511,
      "step": 813
    },
    {
      "epoch": 0.841343669250646,
      "grad_norm": 0.002582973296398741,
      "learning_rate": 6.435743760905083e-07,
      "loss": 0.879,
      "step": 814
    },
    {
      "epoch": 0.8423772609819121,
      "grad_norm": 0.06533950929556173,
      "learning_rate": 6.353714392633698e-07,
      "loss": 0.74,
      "step": 815
    },
    {
      "epoch": 0.8434108527131783,
      "grad_norm": 0.012808778374983931,
      "learning_rate": 6.272175669440861e-07,
      "loss": 0.6365,
      "step": 816
    },
    {
      "epoch": 0.8444444444444444,
      "grad_norm": 0.006707754566061283,
      "learning_rate": 6.191128507935479e-07,
      "loss": 0.5068,
      "step": 817
    },
    {
      "epoch": 0.8454780361757106,
      "grad_norm": 0.006694381675220189,
      "learning_rate": 6.110573819200605e-07,
      "loss": 0.952,
      "step": 818
    },
    {
      "epoch": 0.8465116279069768,
      "grad_norm": 0.006432108374091037,
      "learning_rate": 6.030512508783187e-07,
      "loss": 1.1633,
      "step": 819
    },
    {
      "epoch": 0.8475452196382429,
      "grad_norm": 0.03958940397784233,
      "learning_rate": 5.950945476683955e-07,
      "loss": 0.3847,
      "step": 820
    },
    {
      "epoch": 0.8485788113695091,
      "grad_norm": 0.018933218272065976,
      "learning_rate": 5.871873617347218e-07,
      "loss": 0.8753,
      "step": 821
    },
    {
      "epoch": 0.8496124031007752,
      "grad_norm": 0.011195394096158964,
      "learning_rate": 5.793297819650884e-07,
      "loss": 0.8899,
      "step": 822
    },
    {
      "epoch": 0.8506459948320414,
      "grad_norm": 0.022657171215932623,
      "learning_rate": 5.715218966896435e-07,
      "loss": 0.9801,
      "step": 823
    },
    {
      "epoch": 0.8516795865633074,
      "grad_norm": 0.014766916333780212,
      "learning_rate": 5.637637936798978e-07,
      "loss": 0.3904,
      "step": 824
    },
    {
      "epoch": 0.8527131782945736,
      "grad_norm": 0.016527499350431345,
      "learning_rate": 5.560555601477418e-07,
      "loss": 0.7574,
      "step": 825
    },
    {
      "epoch": 0.8537467700258398,
      "grad_norm": 0.012744925617249407,
      "learning_rate": 5.483972827444645e-07,
      "loss": 0.7223,
      "step": 826
    },
    {
      "epoch": 0.8547803617571059,
      "grad_norm": 0.032976619851138304,
      "learning_rate": 5.407890475597761e-07,
      "loss": 0.4226,
      "step": 827
    },
    {
      "epoch": 0.8558139534883721,
      "grad_norm": 0.04994385913120739,
      "learning_rate": 5.332309401208407e-07,
      "loss": 0.6357,
      "step": 828
    },
    {
      "epoch": 0.8568475452196382,
      "grad_norm": 0.005007714646051687,
      "learning_rate": 5.257230453913237e-07,
      "loss": 0.5771,
      "step": 829
    },
    {
      "epoch": 0.8578811369509044,
      "grad_norm": 0.015028844597875853,
      "learning_rate": 5.182654477704229e-07,
      "loss": 1.0524,
      "step": 830
    },
    {
      "epoch": 0.8589147286821706,
      "grad_norm": 0.001031345716606286,
      "learning_rate": 5.108582310919302e-07,
      "loss": 0.5849,
      "step": 831
    },
    {
      "epoch": 0.8599483204134367,
      "grad_norm": 0.025862677901814058,
      "learning_rate": 5.035014786232828e-07,
      "loss": 1.1943,
      "step": 832
    },
    {
      "epoch": 0.8609819121447029,
      "grad_norm": 0.002708473417464179,
      "learning_rate": 4.961952730646319e-07,
      "loss": 0.7797,
      "step": 833
    },
    {
      "epoch": 0.862015503875969,
      "grad_norm": 0.03392063243267157,
      "learning_rate": 4.889396965479115e-07,
      "loss": 0.4498,
      "step": 834
    },
    {
      "epoch": 0.8630490956072352,
      "grad_norm": 0.016233336877047963,
      "learning_rate": 4.817348306359121e-07,
      "loss": 1.1181,
      "step": 835
    },
    {
      "epoch": 0.8640826873385012,
      "grad_norm": 0.014932237824310404,
      "learning_rate": 4.745807563213678e-07,
      "loss": 1.1563,
      "step": 836
    },
    {
      "epoch": 0.8651162790697674,
      "grad_norm": 0.00850068425870214,
      "learning_rate": 4.6747755402604565e-07,
      "loss": 0.5335,
      "step": 837
    },
    {
      "epoch": 0.8661498708010336,
      "grad_norm": 0.02534395682997756,
      "learning_rate": 4.6042530359983793e-07,
      "loss": 0.7235,
      "step": 838
    },
    {
      "epoch": 0.8671834625322997,
      "grad_norm": 0.01298947679842675,
      "learning_rate": 4.534240843198662e-07,
      "loss": 0.7924,
      "step": 839
    },
    {
      "epoch": 0.8682170542635659,
      "grad_norm": 0.013827611763749843,
      "learning_rate": 4.464739748895963e-07,
      "loss": 0.7045,
      "step": 840
    },
    {
      "epoch": 0.869250645994832,
      "grad_norm": 0.015381110228690788,
      "learning_rate": 4.3957505343794115e-07,
      "loss": 0.5117,
      "step": 841
    },
    {
      "epoch": 0.8702842377260982,
      "grad_norm": 0.007115208455553591,
      "learning_rate": 4.327273975183949e-07,
      "loss": 0.851,
      "step": 842
    },
    {
      "epoch": 0.8713178294573644,
      "grad_norm": 0.005095526850858943,
      "learning_rate": 4.259310841081515e-07,
      "loss": 0.6649,
      "step": 843
    },
    {
      "epoch": 0.8723514211886305,
      "grad_norm": 0.06091932361131538,
      "learning_rate": 4.191861896072458e-07,
      "loss": 0.3351,
      "step": 844
    },
    {
      "epoch": 0.8733850129198967,
      "grad_norm": 0.003489852259926993,
      "learning_rate": 4.1249278983769405e-07,
      "loss": 0.9606,
      "step": 845
    },
    {
      "epoch": 0.8744186046511628,
      "grad_norm": 0.029484921532894476,
      "learning_rate": 4.058509600426358e-07,
      "loss": 0.7489,
      "step": 846
    },
    {
      "epoch": 0.875452196382429,
      "grad_norm": 0.014768787171775257,
      "learning_rate": 3.9926077488549543e-07,
      "loss": 0.3782,
      "step": 847
    },
    {
      "epoch": 0.876485788113695,
      "grad_norm": 0.0031785487939993452,
      "learning_rate": 3.9272230844913884e-07,
      "loss": 1.1407,
      "step": 848
    },
    {
      "epoch": 0.8775193798449612,
      "grad_norm": 0.003505630013563834,
      "learning_rate": 3.8623563423504094e-07,
      "loss": 0.9282,
      "step": 849
    },
    {
      "epoch": 0.8785529715762274,
      "grad_norm": 0.034334100747487196,
      "learning_rate": 3.798008251624585e-07,
      "loss": 0.4983,
      "step": 850
    },
    {
      "epoch": 0.8795865633074935,
      "grad_norm": 0.024597575936048263,
      "learning_rate": 3.734179535676169e-07,
      "loss": 0.9802,
      "step": 851
    },
    {
      "epoch": 0.8806201550387597,
      "grad_norm": 0.007125218404187427,
      "learning_rate": 3.6708709120288564e-07,
      "loss": 0.5697,
      "step": 852
    },
    {
      "epoch": 0.8816537467700258,
      "grad_norm": 0.003566392342948006,
      "learning_rate": 3.6080830923598266e-07,
      "loss": 0.4457,
      "step": 853
    },
    {
      "epoch": 0.882687338501292,
      "grad_norm": 0.00816864392447842,
      "learning_rate": 3.545816782491657e-07,
      "loss": 0.6061,
      "step": 854
    },
    {
      "epoch": 0.8837209302325582,
      "grad_norm": 0.014924361036411319,
      "learning_rate": 3.484072682384465e-07,
      "loss": 0.6283,
      "step": 855
    },
    {
      "epoch": 0.8847545219638243,
      "grad_norm": 0.013963665699759585,
      "learning_rate": 3.422851486127987e-07,
      "loss": 0.9217,
      "step": 856
    },
    {
      "epoch": 0.8857881136950905,
      "grad_norm": 0.06461927730201542,
      "learning_rate": 3.3621538819337776e-07,
      "loss": 0.6418,
      "step": 857
    },
    {
      "epoch": 0.8868217054263566,
      "grad_norm": 0.008408551995551511,
      "learning_rate": 3.301980552127509e-07,
      "loss": 0.3701,
      "step": 858
    },
    {
      "epoch": 0.8878552971576228,
      "grad_norm": 0.0176030485569631,
      "learning_rate": 3.2423321731412774e-07,
      "loss": 0.997,
      "step": 859
    },
    {
      "epoch": 0.8888888888888888,
      "grad_norm": 0.019747480271695275,
      "learning_rate": 3.1832094155059776e-07,
      "loss": 1.024,
      "step": 860
    },
    {
      "epoch": 0.889922480620155,
      "grad_norm": 0.02173325469272559,
      "learning_rate": 3.1246129438438076e-07,
      "loss": 0.7247,
      "step": 861
    },
    {
      "epoch": 0.8909560723514212,
      "grad_norm": 0.016763966393016725,
      "learning_rate": 3.0665434168607846e-07,
      "loss": 0.5944,
      "step": 862
    },
    {
      "epoch": 0.8919896640826873,
      "grad_norm": 0.025673284776176798,
      "learning_rate": 3.009001487339308e-07,
      "loss": 0.5603,
      "step": 863
    },
    {
      "epoch": 0.8930232558139535,
      "grad_norm": 0.0033186704502830987,
      "learning_rate": 2.9519878021308624e-07,
      "loss": 0.3139,
      "step": 864
    },
    {
      "epoch": 0.8940568475452196,
      "grad_norm": 0.004701957563556191,
      "learning_rate": 2.8955030021487254e-07,
      "loss": 0.9224,
      "step": 865
    },
    {
      "epoch": 0.8950904392764858,
      "grad_norm": 0.0009519877852949781,
      "learning_rate": 2.839547722360769e-07,
      "loss": 0.4499,
      "step": 866
    },
    {
      "epoch": 0.896124031007752,
      "grad_norm": 0.007616935306015169,
      "learning_rate": 2.7841225917823347e-07,
      "loss": 0.8934,
      "step": 867
    },
    {
      "epoch": 0.8971576227390181,
      "grad_norm": 0.00705386806399859,
      "learning_rate": 2.7292282334691167e-07,
      "loss": 0.6063,
      "step": 868
    },
    {
      "epoch": 0.8981912144702843,
      "grad_norm": 0.0052992349458277676,
      "learning_rate": 2.674865264510218e-07,
      "loss": 1.0514,
      "step": 869
    },
    {
      "epoch": 0.8992248062015504,
      "grad_norm": 0.07335714207068075,
      "learning_rate": 2.6210342960211744e-07,
      "loss": 0.9202,
      "step": 870
    },
    {
      "epoch": 0.9002583979328166,
      "grad_norm": 0.019888721699752896,
      "learning_rate": 2.5677359331370834e-07,
      "loss": 0.93,
      "step": 871
    },
    {
      "epoch": 0.9012919896640826,
      "grad_norm": 0.017975591933389424,
      "learning_rate": 2.5149707750058316e-07,
      "loss": 0.6725,
      "step": 872
    },
    {
      "epoch": 0.9023255813953488,
      "grad_norm": 0.004427766422287238,
      "learning_rate": 2.462739414781334e-07,
      "loss": 0.8811,
      "step": 873
    },
    {
      "epoch": 0.903359173126615,
      "grad_norm": 0.0036673258697909666,
      "learning_rate": 2.411042439616873e-07,
      "loss": 0.3816,
      "step": 874
    },
    {
      "epoch": 0.9043927648578811,
      "grad_norm": 0.008558097413778366,
      "learning_rate": 2.3598804306584843e-07,
      "loss": 0.6527,
      "step": 875
    },
    {
      "epoch": 0.9054263565891473,
      "grad_norm": 0.00774908985466421,
      "learning_rate": 2.309253963038477e-07,
      "loss": 1.0971,
      "step": 876
    },
    {
      "epoch": 0.9064599483204134,
      "grad_norm": 0.0166768381847291,
      "learning_rate": 2.2591636058688804e-07,
      "loss": 0.4881,
      "step": 877
    },
    {
      "epoch": 0.9074935400516796,
      "grad_norm": 0.008403315138525429,
      "learning_rate": 2.2096099222351343e-07,
      "loss": 0.4911,
      "step": 878
    },
    {
      "epoch": 0.9085271317829458,
      "grad_norm": 0.006970951675827918,
      "learning_rate": 2.1605934691896868e-07,
      "loss": 0.9267,
      "step": 879
    },
    {
      "epoch": 0.9095607235142119,
      "grad_norm": 0.0005730256951126856,
      "learning_rate": 2.1121147977457956e-07,
      "loss": 0.4195,
      "step": 880
    },
    {
      "epoch": 0.9105943152454781,
      "grad_norm": 0.018247063452914228,
      "learning_rate": 2.0641744528712925e-07,
      "loss": 0.8503,
      "step": 881
    },
    {
      "epoch": 0.9116279069767442,
      "grad_norm": 0.01197171671411708,
      "learning_rate": 2.0167729734824558e-07,
      "loss": 0.9021,
      "step": 882
    },
    {
      "epoch": 0.9126614987080104,
      "grad_norm": 0.012930664657020472,
      "learning_rate": 1.9699108924379818e-07,
      "loss": 0.9021,
      "step": 883
    },
    {
      "epoch": 0.9136950904392764,
      "grad_norm": 0.0046163161371517315,
      "learning_rate": 1.9235887365329774e-07,
      "loss": 1.2899,
      "step": 884
    },
    {
      "epoch": 0.9147286821705426,
      "grad_norm": 0.0026019269542379418,
      "learning_rate": 1.877807026493028e-07,
      "loss": 0.9028,
      "step": 885
    },
    {
      "epoch": 0.9157622739018088,
      "grad_norm": 0.02124956737100471,
      "learning_rate": 1.832566276968345e-07,
      "loss": 0.439,
      "step": 886
    },
    {
      "epoch": 0.9167958656330749,
      "grad_norm": 0.0017471115189419428,
      "learning_rate": 1.7878669965280315e-07,
      "loss": 1.4184,
      "step": 887
    },
    {
      "epoch": 0.9178294573643411,
      "grad_norm": 0.017786935518014153,
      "learning_rate": 1.7437096876542713e-07,
      "loss": 0.3844,
      "step": 888
    },
    {
      "epoch": 0.9188630490956072,
      "grad_norm": 0.007530570540772561,
      "learning_rate": 1.7000948467367718e-07,
      "loss": 0.7232,
      "step": 889
    },
    {
      "epoch": 0.9198966408268734,
      "grad_norm": 0.006845777829943065,
      "learning_rate": 1.657022964067112e-07,
      "loss": 0.714,
      "step": 890
    },
    {
      "epoch": 0.9209302325581395,
      "grad_norm": 0.039153961682722595,
      "learning_rate": 1.6144945238332987e-07,
      "loss": 0.5794,
      "step": 891
    },
    {
      "epoch": 0.9219638242894057,
      "grad_norm": 0.007183033221697448,
      "learning_rate": 1.5725100041142694e-07,
      "loss": 1.0499,
      "step": 892
    },
    {
      "epoch": 0.9229974160206719,
      "grad_norm": 0.0021226795285977686,
      "learning_rate": 1.5310698768745247e-07,
      "loss": 0.5968,
      "step": 893
    },
    {
      "epoch": 0.924031007751938,
      "grad_norm": 0.004168027621043818,
      "learning_rate": 1.4901746079588552e-07,
      "loss": 0.6378,
      "step": 894
    },
    {
      "epoch": 0.9250645994832042,
      "grad_norm": 0.021641177211593483,
      "learning_rate": 1.4498246570870843e-07,
      "loss": 1.3117,
      "step": 895
    },
    {
      "epoch": 0.9260981912144702,
      "grad_norm": 0.004461097199894616,
      "learning_rate": 1.4100204778488947e-07,
      "loss": 1.1212,
      "step": 896
    },
    {
      "epoch": 0.9271317829457364,
      "grad_norm": 0.005901342952041752,
      "learning_rate": 1.370762517698715e-07,
      "loss": 0.6615,
      "step": 897
    },
    {
      "epoch": 0.9281653746770026,
      "grad_norm": 0.009872960379135575,
      "learning_rate": 1.3320512179507528e-07,
      "loss": 0.6646,
      "step": 898
    },
    {
      "epoch": 0.9291989664082687,
      "grad_norm": 0.018956571190745845,
      "learning_rate": 1.293887013773959e-07,
      "loss": 0.3638,
      "step": 899
    },
    {
      "epoch": 0.9302325581395349,
      "grad_norm": 0.015327747746304271,
      "learning_rate": 1.2562703341871708e-07,
      "loss": 0.8464,
      "step": 900
    },
    {
      "epoch": 0.931266149870801,
      "grad_norm": 0.01849953182596782,
      "learning_rate": 1.2192016020542986e-07,
      "loss": 0.8028,
      "step": 901
    },
    {
      "epoch": 0.9322997416020672,
      "grad_norm": 0.07084982718920227,
      "learning_rate": 1.1826812340795524e-07,
      "loss": 0.9176,
      "step": 902
    },
    {
      "epoch": 0.9333333333333333,
      "grad_norm": 0.012396527832198792,
      "learning_rate": 1.1467096408027678e-07,
      "loss": 0.7654,
      "step": 903
    },
    {
      "epoch": 0.9343669250645995,
      "grad_norm": 0.004315964231516348,
      "learning_rate": 1.1112872265947816e-07,
      "loss": 0.5223,
      "step": 904
    },
    {
      "epoch": 0.9354005167958657,
      "grad_norm": 0.012481091170417579,
      "learning_rate": 1.0764143896528967e-07,
      "loss": 0.8383,
      "step": 905
    },
    {
      "epoch": 0.9364341085271318,
      "grad_norm": 0.011503841014764074,
      "learning_rate": 1.0420915219964023e-07,
      "loss": 1.3222,
      "step": 906
    },
    {
      "epoch": 0.937467700258398,
      "grad_norm": 0.022320651405682183,
      "learning_rate": 1.008319009462172e-07,
      "loss": 0.438,
      "step": 907
    },
    {
      "epoch": 0.938501291989664,
      "grad_norm": 0.0036228400061420712,
      "learning_rate": 9.75097231700295e-08,
      "loss": 1.2219,
      "step": 908
    },
    {
      "epoch": 0.9395348837209302,
      "grad_norm": 0.0073199987840382105,
      "learning_rate": 9.424265621698736e-08,
      "loss": 0.7794,
      "step": 909
    },
    {
      "epoch": 0.9405684754521964,
      "grad_norm": 0.11927842003235607,
      "learning_rate": 9.103073681347607e-08,
      "loss": 1.1147,
      "step": 910
    },
    {
      "epoch": 0.9416020671834625,
      "grad_norm": 0.03367690794285895,
      "learning_rate": 8.787400106594568e-08,
      "loss": 0.6853,
      "step": 911
    },
    {
      "epoch": 0.9426356589147287,
      "grad_norm": 0.10401954250006279,
      "learning_rate": 8.477248446050523e-08,
      "loss": 1.2952,
      "step": 912
    },
    {
      "epoch": 0.9436692506459948,
      "grad_norm": 0.006305920032169265,
      "learning_rate": 8.172622186252421e-08,
      "loss": 0.4384,
      "step": 913
    },
    {
      "epoch": 0.944702842377261,
      "grad_norm": 0.007534496598198271,
      "learning_rate": 7.873524751624006e-08,
      "loss": 0.7177,
      "step": 914
    },
    {
      "epoch": 0.9457364341085271,
      "grad_norm": 0.024664413834251095,
      "learning_rate": 7.579959504437184e-08,
      "loss": 0.7263,
      "step": 915
    },
    {
      "epoch": 0.9467700258397933,
      "grad_norm": 0.0072192615067336085,
      "learning_rate": 7.291929744774495e-08,
      "loss": 0.9194,
      "step": 916
    },
    {
      "epoch": 0.9478036175710595,
      "grad_norm": 0.003950462404790232,
      "learning_rate": 7.009438710491978e-08,
      "loss": 0.8107,
      "step": 917
    },
    {
      "epoch": 0.9488372093023256,
      "grad_norm": 0.03059717372345069,
      "learning_rate": 6.732489577182422e-08,
      "loss": 0.7147,
      "step": 918
    },
    {
      "epoch": 0.9498708010335918,
      "grad_norm": 0.009341006625160806,
      "learning_rate": 6.461085458140059e-08,
      "loss": 0.4426,
      "step": 919
    },
    {
      "epoch": 0.9509043927648578,
      "grad_norm": 0.0043964179443133425,
      "learning_rate": 6.195229404325542e-08,
      "loss": 0.4205,
      "step": 920
    },
    {
      "epoch": 0.951937984496124,
      "grad_norm": 0.004769251164524089,
      "learning_rate": 5.934924404331355e-08,
      "loss": 0.8856,
      "step": 921
    },
    {
      "epoch": 0.9529715762273901,
      "grad_norm": 0.004518669489301304,
      "learning_rate": 5.680173384348453e-08,
      "loss": 0.6484,
      "step": 922
    },
    {
      "epoch": 0.9540051679586563,
      "grad_norm": 0.013592575893929818,
      "learning_rate": 5.4309792081334024e-08,
      "loss": 0.8035,
      "step": 923
    },
    {
      "epoch": 0.9550387596899225,
      "grad_norm": 0.01614294733036871,
      "learning_rate": 5.187344676976014e-08,
      "loss": 0.6429,
      "step": 924
    },
    {
      "epoch": 0.9560723514211886,
      "grad_norm": 0.009737472152470962,
      "learning_rate": 4.949272529667926e-08,
      "loss": 0.6036,
      "step": 925
    },
    {
      "epoch": 0.9571059431524548,
      "grad_norm": 0.00042279291177400066,
      "learning_rate": 4.716765442471849e-08,
      "loss": 0.4076,
      "step": 926
    },
    {
      "epoch": 0.958139534883721,
      "grad_norm": 0.013887568425627873,
      "learning_rate": 4.489826029091593e-08,
      "loss": 0.6691,
      "step": 927
    },
    {
      "epoch": 0.9591731266149871,
      "grad_norm": 0.0035180842897300233,
      "learning_rate": 4.2684568406423656e-08,
      "loss": 0.7958,
      "step": 928
    },
    {
      "epoch": 0.9602067183462533,
      "grad_norm": 0.013217463388377846,
      "learning_rate": 4.0526603656223515e-08,
      "loss": 0.6344,
      "step": 929
    },
    {
      "epoch": 0.9612403100775194,
      "grad_norm": 0.00954380519628361,
      "learning_rate": 3.8424390298846815e-08,
      "loss": 0.7935,
      "step": 930
    },
    {
      "epoch": 0.9622739018087856,
      "grad_norm": 0.007291903217675602,
      "learning_rate": 3.637795196610228e-08,
      "loss": 0.5884,
      "step": 931
    },
    {
      "epoch": 0.9633074935400516,
      "grad_norm": 0.08128252124030343,
      "learning_rate": 3.4387311662807396e-08,
      "loss": 0.6594,
      "step": 932
    },
    {
      "epoch": 0.9643410852713178,
      "grad_norm": 0.00870948046461966,
      "learning_rate": 3.24524917665342e-08,
      "loss": 0.5233,
      "step": 933
    },
    {
      "epoch": 0.965374677002584,
      "grad_norm": 0.026861522484950367,
      "learning_rate": 3.0573514027355535e-08,
      "loss": 0.9255,
      "step": 934
    },
    {
      "epoch": 0.9664082687338501,
      "grad_norm": 0.018175518662060325,
      "learning_rate": 2.8750399567599174e-08,
      "loss": 1.0592,
      "step": 935
    },
    {
      "epoch": 0.9674418604651163,
      "grad_norm": 0.021824483921169483,
      "learning_rate": 2.6983168881611897e-08,
      "loss": 1.0364,
      "step": 936
    },
    {
      "epoch": 0.9684754521963824,
      "grad_norm": 0.0034176928651543865,
      "learning_rate": 2.527184183553022e-08,
      "loss": 0.5663,
      "step": 937
    },
    {
      "epoch": 0.9695090439276486,
      "grad_norm": 0.013964083239375676,
      "learning_rate": 2.3616437667055014e-08,
      "loss": 0.4137,
      "step": 938
    },
    {
      "epoch": 0.9705426356589147,
      "grad_norm": 0.0073981665646462,
      "learning_rate": 2.2016974985236695e-08,
      "loss": 0.774,
      "step": 939
    },
    {
      "epoch": 0.9715762273901809,
      "grad_norm": 0.00433101610873677,
      "learning_rate": 2.047347177026371e-08,
      "loss": 0.4434,
      "step": 940
    },
    {
      "epoch": 0.9726098191214471,
      "grad_norm": 0.027213731926810567,
      "learning_rate": 1.898594537326437e-08,
      "loss": 0.9188,
      "step": 941
    },
    {
      "epoch": 0.9736434108527132,
      "grad_norm": 0.0023400363883771804,
      "learning_rate": 1.7554412516108678e-08,
      "loss": 0.3658,
      "step": 942
    },
    {
      "epoch": 0.9746770025839794,
      "grad_norm": 0.0043012616023975665,
      "learning_rate": 1.6178889291220135e-08,
      "loss": 0.7917,
      "step": 943
    },
    {
      "epoch": 0.9757105943152454,
      "grad_norm": 0.08186628326443109,
      "learning_rate": 1.4859391161397008e-08,
      "loss": 0.866,
      "step": 944
    },
    {
      "epoch": 0.9767441860465116,
      "grad_norm": 0.013080369204593514,
      "learning_rate": 1.3595932959638015e-08,
      "loss": 0.6698,
      "step": 945
    },
    {
      "epoch": 0.9777777777777777,
      "grad_norm": 0.013993109593360257,
      "learning_rate": 1.2388528888973017e-08,
      "loss": 0.9159,
      "step": 946
    },
    {
      "epoch": 0.9788113695090439,
      "grad_norm": 0.050821436136727384,
      "learning_rate": 1.1237192522307594e-08,
      "loss": 1.1753,
      "step": 947
    },
    {
      "epoch": 0.9798449612403101,
      "grad_norm": 0.10342116525422385,
      "learning_rate": 1.014193680226594e-08,
      "loss": 0.6903,
      "step": 948
    },
    {
      "epoch": 0.9808785529715762,
      "grad_norm": 0.004117476356494686,
      "learning_rate": 9.102774041049867e-09,
      "loss": 0.5884,
      "step": 949
    },
    {
      "epoch": 0.9819121447028424,
      "grad_norm": 0.027134346958036265,
      "learning_rate": 8.119715920296145e-09,
      "loss": 0.8897,
      "step": 950
    },
    {
      "epoch": 0.9829457364341085,
      "grad_norm": 0.032197020118717036,
      "learning_rate": 7.1927734909488235e-09,
      "loss": 0.659,
      "step": 951
    },
    {
      "epoch": 0.9839793281653747,
      "grad_norm": 0.008070785052535724,
      "learning_rate": 6.321957173132665e-09,
      "loss": 0.3277,
      "step": 952
    },
    {
      "epoch": 0.9850129198966409,
      "grad_norm": 0.0015410588290119592,
      "learning_rate": 5.507276756036018e-09,
      "loss": 0.7086,
      "step": 953
    },
    {
      "epoch": 0.986046511627907,
      "grad_norm": 0.01084411952690762,
      "learning_rate": 4.74874139780257e-09,
      "loss": 0.5152,
      "step": 954
    },
    {
      "epoch": 0.9870801033591732,
      "grad_norm": 0.003021167736429101,
      "learning_rate": 4.046359625426988e-09,
      "loss": 0.4238,
      "step": 955
    },
    {
      "epoch": 0.9881136950904392,
      "grad_norm": 0.02453880416385955,
      "learning_rate": 3.400139334658881e-09,
      "loss": 0.8515,
      "step": 956
    },
    {
      "epoch": 0.9891472868217054,
      "grad_norm": 0.009371908593720371,
      "learning_rate": 2.81008778991565e-09,
      "loss": 0.4927,
      "step": 957
    },
    {
      "epoch": 0.9901808785529715,
      "grad_norm": 0.01848523119576953,
      "learning_rate": 2.27621162419811e-09,
      "loss": 1.0906,
      "step": 958
    },
    {
      "epoch": 0.9912144702842377,
      "grad_norm": 0.030507973499769067,
      "learning_rate": 1.7985168390194375e-09,
      "loss": 0.651,
      "step": 959
    },
    {
      "epoch": 0.9922480620155039,
      "grad_norm": 0.019491545523596474,
      "learning_rate": 1.3770088043335573e-09,
      "loss": 0.6537,
      "step": 960
    },
    {
      "epoch": 0.99328165374677,
      "grad_norm": 0.007849238896423888,
      "learning_rate": 1.01169225847908e-09,
      "loss": 0.6244,
      "step": 961
    },
    {
      "epoch": 0.9943152454780362,
      "grad_norm": 0.011303433638736843,
      "learning_rate": 7.025713081232343e-10,
      "loss": 0.6003,
      "step": 962
    },
    {
      "epoch": 0.9953488372093023,
      "grad_norm": 0.013983809429452264,
      "learning_rate": 4.496494282157926e-10,
      "loss": 0.9153,
      "step": 963
    },
    {
      "epoch": 0.9963824289405685,
      "grad_norm": 0.03675942490677278,
      "learning_rate": 2.529294619513234e-10,
      "loss": 0.5355,
      "step": 964
    },
    {
      "epoch": 0.9974160206718347,
      "grad_norm": 0.004266314110313392,
      "learning_rate": 1.1241362073588502e-10,
      "loss": 0.9572,
      "step": 965
    },
    {
      "epoch": 0.9984496124031008,
      "grad_norm": 0.02075004628400116,
      "learning_rate": 2.8103484164820894e-11,
      "loss": 0.4781,
      "step": 966
    },
    {
      "epoch": 0.999483204134367,
      "grad_norm": 0.02371927028225392,
      "learning_rate": 0.0,
      "loss": 0.8601,
      "step": 967
    },
    {
      "epoch": 0.999483204134367,
      "step": 967,
      "total_flos": 99861212291072.0,
      "train_loss": 0.7226208712840154,
      "train_runtime": 77677.7691,
      "train_samples_per_second": 0.05,
      "train_steps_per_second": 0.012
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 967,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 1000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 99861212291072.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}
