VideoChat-Flash-Qwen2_5-2B_.../trainer_state.json

110519 lines
2.4 MiB

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 15784,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 9.532355308532715,
"learning_rate": 2.109704641350211e-08,
"loss": 1.3252,
"step": 1
},
{
"epoch": 0.0,
"grad_norm": 9.528098106384277,
"learning_rate": 4.219409282700422e-08,
"loss": 1.3375,
"step": 2
},
{
"epoch": 0.0,
"grad_norm": 9.426416397094727,
"learning_rate": 6.329113924050633e-08,
"loss": 1.3024,
"step": 3
},
{
"epoch": 0.0,
"grad_norm": 8.792394638061523,
"learning_rate": 8.438818565400844e-08,
"loss": 1.2546,
"step": 4
},
{
"epoch": 0.0,
"grad_norm": 9.00940227508545,
"learning_rate": 1.0548523206751055e-07,
"loss": 1.2509,
"step": 5
},
{
"epoch": 0.0,
"grad_norm": 9.64754867553711,
"learning_rate": 1.2658227848101266e-07,
"loss": 1.3443,
"step": 6
},
{
"epoch": 0.0,
"grad_norm": 8.994702339172363,
"learning_rate": 1.4767932489451477e-07,
"loss": 1.3474,
"step": 7
},
{
"epoch": 0.0,
"grad_norm": 9.668071746826172,
"learning_rate": 1.6877637130801689e-07,
"loss": 1.2868,
"step": 8
},
{
"epoch": 0.0,
"grad_norm": 10.72716999053955,
"learning_rate": 1.89873417721519e-07,
"loss": 1.3352,
"step": 9
},
{
"epoch": 0.0,
"grad_norm": 9.297769546508789,
"learning_rate": 2.109704641350211e-07,
"loss": 1.3187,
"step": 10
},
{
"epoch": 0.0,
"grad_norm": 10.302685737609863,
"learning_rate": 2.3206751054852324e-07,
"loss": 1.2814,
"step": 11
},
{
"epoch": 0.0,
"grad_norm": 9.82099437713623,
"learning_rate": 2.5316455696202533e-07,
"loss": 1.3105,
"step": 12
},
{
"epoch": 0.0,
"grad_norm": 9.666701316833496,
"learning_rate": 2.7426160337552746e-07,
"loss": 1.3054,
"step": 13
},
{
"epoch": 0.0,
"grad_norm": 8.828336715698242,
"learning_rate": 2.9535864978902955e-07,
"loss": 1.2671,
"step": 14
},
{
"epoch": 0.0,
"grad_norm": 9.0466890335083,
"learning_rate": 3.164556962025317e-07,
"loss": 1.3082,
"step": 15
},
{
"epoch": 0.0,
"grad_norm": 10.455029487609863,
"learning_rate": 3.3755274261603377e-07,
"loss": 1.3541,
"step": 16
},
{
"epoch": 0.0,
"grad_norm": 9.330451965332031,
"learning_rate": 3.586497890295359e-07,
"loss": 1.3523,
"step": 17
},
{
"epoch": 0.0,
"grad_norm": 9.279951095581055,
"learning_rate": 3.79746835443038e-07,
"loss": 1.3471,
"step": 18
},
{
"epoch": 0.0,
"grad_norm": 9.216424942016602,
"learning_rate": 4.0084388185654013e-07,
"loss": 1.2502,
"step": 19
},
{
"epoch": 0.0,
"grad_norm": 9.830771446228027,
"learning_rate": 4.219409282700422e-07,
"loss": 1.3262,
"step": 20
},
{
"epoch": 0.0,
"grad_norm": 8.596817970275879,
"learning_rate": 4.4303797468354435e-07,
"loss": 1.2861,
"step": 21
},
{
"epoch": 0.0,
"grad_norm": 7.879246711730957,
"learning_rate": 4.641350210970465e-07,
"loss": 1.3028,
"step": 22
},
{
"epoch": 0.0,
"grad_norm": 7.26005744934082,
"learning_rate": 4.852320675105486e-07,
"loss": 1.1705,
"step": 23
},
{
"epoch": 0.0,
"grad_norm": 8.682889938354492,
"learning_rate": 5.063291139240507e-07,
"loss": 1.2799,
"step": 24
},
{
"epoch": 0.0,
"grad_norm": 7.49697732925415,
"learning_rate": 5.274261603375528e-07,
"loss": 1.2697,
"step": 25
},
{
"epoch": 0.0,
"grad_norm": 8.21834945678711,
"learning_rate": 5.485232067510549e-07,
"loss": 1.3346,
"step": 26
},
{
"epoch": 0.0,
"grad_norm": 7.122488021850586,
"learning_rate": 5.69620253164557e-07,
"loss": 1.2485,
"step": 27
},
{
"epoch": 0.0,
"grad_norm": 7.122411727905273,
"learning_rate": 5.907172995780591e-07,
"loss": 1.2263,
"step": 28
},
{
"epoch": 0.0,
"grad_norm": 7.260400295257568,
"learning_rate": 6.118143459915613e-07,
"loss": 1.2086,
"step": 29
},
{
"epoch": 0.0,
"grad_norm": 6.424813270568848,
"learning_rate": 6.329113924050634e-07,
"loss": 1.216,
"step": 30
},
{
"epoch": 0.0,
"grad_norm": 6.159492492675781,
"learning_rate": 6.540084388185656e-07,
"loss": 1.1744,
"step": 31
},
{
"epoch": 0.0,
"grad_norm": 6.250075817108154,
"learning_rate": 6.751054852320675e-07,
"loss": 1.2486,
"step": 32
},
{
"epoch": 0.0,
"grad_norm": 6.240837097167969,
"learning_rate": 6.962025316455696e-07,
"loss": 1.1966,
"step": 33
},
{
"epoch": 0.0,
"grad_norm": 6.257637023925781,
"learning_rate": 7.172995780590718e-07,
"loss": 1.2206,
"step": 34
},
{
"epoch": 0.0,
"grad_norm": 6.090736389160156,
"learning_rate": 7.383966244725739e-07,
"loss": 1.1555,
"step": 35
},
{
"epoch": 0.0,
"grad_norm": 6.092897891998291,
"learning_rate": 7.59493670886076e-07,
"loss": 1.2119,
"step": 36
},
{
"epoch": 0.0,
"grad_norm": 5.601813793182373,
"learning_rate": 7.805907172995782e-07,
"loss": 1.2041,
"step": 37
},
{
"epoch": 0.0,
"grad_norm": 5.4696221351623535,
"learning_rate": 8.016877637130803e-07,
"loss": 1.0983,
"step": 38
},
{
"epoch": 0.0,
"grad_norm": 5.418956756591797,
"learning_rate": 8.227848101265823e-07,
"loss": 1.1278,
"step": 39
},
{
"epoch": 0.0,
"grad_norm": 4.913137912750244,
"learning_rate": 8.438818565400844e-07,
"loss": 1.0996,
"step": 40
},
{
"epoch": 0.0,
"grad_norm": 3.8683886528015137,
"learning_rate": 8.649789029535865e-07,
"loss": 1.1214,
"step": 41
},
{
"epoch": 0.0,
"grad_norm": 3.4708452224731445,
"learning_rate": 8.860759493670887e-07,
"loss": 1.0712,
"step": 42
},
{
"epoch": 0.0,
"grad_norm": 3.7991111278533936,
"learning_rate": 9.071729957805908e-07,
"loss": 1.0548,
"step": 43
},
{
"epoch": 0.0,
"grad_norm": 3.196794033050537,
"learning_rate": 9.28270042194093e-07,
"loss": 1.0285,
"step": 44
},
{
"epoch": 0.0,
"grad_norm": 3.9760518074035645,
"learning_rate": 9.493670886075951e-07,
"loss": 1.0126,
"step": 45
},
{
"epoch": 0.0,
"grad_norm": 3.435934066772461,
"learning_rate": 9.704641350210971e-07,
"loss": 1.0275,
"step": 46
},
{
"epoch": 0.0,
"grad_norm": 2.9900312423706055,
"learning_rate": 9.915611814345991e-07,
"loss": 0.9641,
"step": 47
},
{
"epoch": 0.0,
"grad_norm": 3.0222251415252686,
"learning_rate": 1.0126582278481013e-06,
"loss": 0.9919,
"step": 48
},
{
"epoch": 0.0,
"grad_norm": 2.916668653488159,
"learning_rate": 1.0337552742616035e-06,
"loss": 1.0351,
"step": 49
},
{
"epoch": 0.0,
"grad_norm": 3.182502508163452,
"learning_rate": 1.0548523206751057e-06,
"loss": 1.0153,
"step": 50
},
{
"epoch": 0.0,
"grad_norm": 2.9524974822998047,
"learning_rate": 1.0759493670886077e-06,
"loss": 0.9676,
"step": 51
},
{
"epoch": 0.0,
"grad_norm": 2.7479350566864014,
"learning_rate": 1.0970464135021099e-06,
"loss": 1.0005,
"step": 52
},
{
"epoch": 0.0,
"grad_norm": 1.9719606637954712,
"learning_rate": 1.1181434599156118e-06,
"loss": 1.0069,
"step": 53
},
{
"epoch": 0.0,
"grad_norm": 2.0742597579956055,
"learning_rate": 1.139240506329114e-06,
"loss": 0.9551,
"step": 54
},
{
"epoch": 0.0,
"grad_norm": 2.005128860473633,
"learning_rate": 1.1603375527426162e-06,
"loss": 0.9297,
"step": 55
},
{
"epoch": 0.0,
"grad_norm": 1.920249342918396,
"learning_rate": 1.1814345991561182e-06,
"loss": 0.878,
"step": 56
},
{
"epoch": 0.0,
"grad_norm": 1.9409500360488892,
"learning_rate": 1.2025316455696204e-06,
"loss": 0.9166,
"step": 57
},
{
"epoch": 0.0,
"grad_norm": 1.9121850728988647,
"learning_rate": 1.2236286919831226e-06,
"loss": 0.9379,
"step": 58
},
{
"epoch": 0.0,
"grad_norm": 1.7869422435760498,
"learning_rate": 1.2447257383966246e-06,
"loss": 0.9852,
"step": 59
},
{
"epoch": 0.0,
"grad_norm": 1.9101088047027588,
"learning_rate": 1.2658227848101267e-06,
"loss": 0.918,
"step": 60
},
{
"epoch": 0.0,
"grad_norm": 1.8155966997146606,
"learning_rate": 1.286919831223629e-06,
"loss": 0.8639,
"step": 61
},
{
"epoch": 0.0,
"grad_norm": 1.7884961366653442,
"learning_rate": 1.3080168776371311e-06,
"loss": 0.8911,
"step": 62
},
{
"epoch": 0.0,
"grad_norm": 1.5779091119766235,
"learning_rate": 1.3291139240506329e-06,
"loss": 0.8864,
"step": 63
},
{
"epoch": 0.0,
"grad_norm": 1.529002070426941,
"learning_rate": 1.350210970464135e-06,
"loss": 0.8961,
"step": 64
},
{
"epoch": 0.0,
"grad_norm": 1.548957109451294,
"learning_rate": 1.371308016877637e-06,
"loss": 0.8673,
"step": 65
},
{
"epoch": 0.0,
"grad_norm": 1.5970193147659302,
"learning_rate": 1.3924050632911392e-06,
"loss": 0.8815,
"step": 66
},
{
"epoch": 0.0,
"grad_norm": 1.5011041164398193,
"learning_rate": 1.4135021097046414e-06,
"loss": 0.889,
"step": 67
},
{
"epoch": 0.0,
"grad_norm": 1.4582399129867554,
"learning_rate": 1.4345991561181436e-06,
"loss": 0.878,
"step": 68
},
{
"epoch": 0.0,
"grad_norm": 1.4272208213806152,
"learning_rate": 1.4556962025316456e-06,
"loss": 0.8489,
"step": 69
},
{
"epoch": 0.0,
"grad_norm": 1.435996651649475,
"learning_rate": 1.4767932489451478e-06,
"loss": 0.9408,
"step": 70
},
{
"epoch": 0.0,
"grad_norm": 1.4072030782699585,
"learning_rate": 1.49789029535865e-06,
"loss": 0.8805,
"step": 71
},
{
"epoch": 0.0,
"grad_norm": 1.4391857385635376,
"learning_rate": 1.518987341772152e-06,
"loss": 0.881,
"step": 72
},
{
"epoch": 0.0,
"grad_norm": 1.229433298110962,
"learning_rate": 1.5400843881856542e-06,
"loss": 0.8213,
"step": 73
},
{
"epoch": 0.0,
"grad_norm": 1.2908027172088623,
"learning_rate": 1.5611814345991563e-06,
"loss": 0.8103,
"step": 74
},
{
"epoch": 0.0,
"grad_norm": 1.3904271125793457,
"learning_rate": 1.5822784810126585e-06,
"loss": 0.8272,
"step": 75
},
{
"epoch": 0.0,
"grad_norm": 1.211230754852295,
"learning_rate": 1.6033755274261605e-06,
"loss": 0.8327,
"step": 76
},
{
"epoch": 0.0,
"grad_norm": 1.340195655822754,
"learning_rate": 1.6244725738396625e-06,
"loss": 0.8251,
"step": 77
},
{
"epoch": 0.0,
"grad_norm": 1.2882392406463623,
"learning_rate": 1.6455696202531647e-06,
"loss": 0.8505,
"step": 78
},
{
"epoch": 0.01,
"grad_norm": 1.3191945552825928,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.92,
"step": 79
},
{
"epoch": 0.01,
"grad_norm": 1.2318845987319946,
"learning_rate": 1.6877637130801689e-06,
"loss": 0.854,
"step": 80
},
{
"epoch": 0.01,
"grad_norm": 1.2841240167617798,
"learning_rate": 1.708860759493671e-06,
"loss": 0.8995,
"step": 81
},
{
"epoch": 0.01,
"grad_norm": 1.2077780961990356,
"learning_rate": 1.729957805907173e-06,
"loss": 0.8117,
"step": 82
},
{
"epoch": 0.01,
"grad_norm": 1.1782020330429077,
"learning_rate": 1.7510548523206752e-06,
"loss": 0.8833,
"step": 83
},
{
"epoch": 0.01,
"grad_norm": 1.2451756000518799,
"learning_rate": 1.7721518987341774e-06,
"loss": 0.7938,
"step": 84
},
{
"epoch": 0.01,
"grad_norm": 1.2025009393692017,
"learning_rate": 1.7932489451476796e-06,
"loss": 0.8146,
"step": 85
},
{
"epoch": 0.01,
"grad_norm": 1.2381807565689087,
"learning_rate": 1.8143459915611816e-06,
"loss": 0.8662,
"step": 86
},
{
"epoch": 0.01,
"grad_norm": 1.2321275472640991,
"learning_rate": 1.8354430379746838e-06,
"loss": 0.8094,
"step": 87
},
{
"epoch": 0.01,
"grad_norm": 1.1794078350067139,
"learning_rate": 1.856540084388186e-06,
"loss": 0.8173,
"step": 88
},
{
"epoch": 0.01,
"grad_norm": 1.293404459953308,
"learning_rate": 1.877637130801688e-06,
"loss": 0.8645,
"step": 89
},
{
"epoch": 0.01,
"grad_norm": 1.269100308418274,
"learning_rate": 1.8987341772151901e-06,
"loss": 0.8404,
"step": 90
},
{
"epoch": 0.01,
"grad_norm": 1.262161374092102,
"learning_rate": 1.919831223628692e-06,
"loss": 0.8175,
"step": 91
},
{
"epoch": 0.01,
"grad_norm": 1.1058001518249512,
"learning_rate": 1.9409282700421943e-06,
"loss": 0.8211,
"step": 92
},
{
"epoch": 0.01,
"grad_norm": 1.223036289215088,
"learning_rate": 1.9620253164556965e-06,
"loss": 0.834,
"step": 93
},
{
"epoch": 0.01,
"grad_norm": 1.1103086471557617,
"learning_rate": 1.9831223628691982e-06,
"loss": 0.764,
"step": 94
},
{
"epoch": 0.01,
"grad_norm": 1.1673771142959595,
"learning_rate": 2.0042194092827004e-06,
"loss": 0.8599,
"step": 95
},
{
"epoch": 0.01,
"grad_norm": 1.1508725881576538,
"learning_rate": 2.0253164556962026e-06,
"loss": 0.8131,
"step": 96
},
{
"epoch": 0.01,
"grad_norm": 1.1243406534194946,
"learning_rate": 2.046413502109705e-06,
"loss": 0.695,
"step": 97
},
{
"epoch": 0.01,
"grad_norm": 1.088311791419983,
"learning_rate": 2.067510548523207e-06,
"loss": 0.7975,
"step": 98
},
{
"epoch": 0.01,
"grad_norm": 1.161037564277649,
"learning_rate": 2.088607594936709e-06,
"loss": 0.76,
"step": 99
},
{
"epoch": 0.01,
"grad_norm": 1.1575514078140259,
"learning_rate": 2.1097046413502114e-06,
"loss": 0.8673,
"step": 100
},
{
"epoch": 0.01,
"grad_norm": 1.1434293985366821,
"learning_rate": 2.130801687763713e-06,
"loss": 0.7514,
"step": 101
},
{
"epoch": 0.01,
"grad_norm": 1.1016374826431274,
"learning_rate": 2.1518987341772153e-06,
"loss": 0.812,
"step": 102
},
{
"epoch": 0.01,
"grad_norm": 1.1565126180648804,
"learning_rate": 2.1729957805907175e-06,
"loss": 0.8014,
"step": 103
},
{
"epoch": 0.01,
"grad_norm": 1.1164343357086182,
"learning_rate": 2.1940928270042197e-06,
"loss": 0.7952,
"step": 104
},
{
"epoch": 0.01,
"grad_norm": 1.1574078798294067,
"learning_rate": 2.2151898734177215e-06,
"loss": 0.8017,
"step": 105
},
{
"epoch": 0.01,
"grad_norm": 1.1797399520874023,
"learning_rate": 2.2362869198312237e-06,
"loss": 0.7733,
"step": 106
},
{
"epoch": 0.01,
"grad_norm": 1.1039958000183105,
"learning_rate": 2.257383966244726e-06,
"loss": 0.77,
"step": 107
},
{
"epoch": 0.01,
"grad_norm": 1.1885851621627808,
"learning_rate": 2.278481012658228e-06,
"loss": 0.8172,
"step": 108
},
{
"epoch": 0.01,
"grad_norm": 1.1390385627746582,
"learning_rate": 2.2995780590717302e-06,
"loss": 0.7927,
"step": 109
},
{
"epoch": 0.01,
"grad_norm": 1.1092506647109985,
"learning_rate": 2.3206751054852324e-06,
"loss": 0.7741,
"step": 110
},
{
"epoch": 0.01,
"grad_norm": 1.2666289806365967,
"learning_rate": 2.341772151898734e-06,
"loss": 0.7964,
"step": 111
},
{
"epoch": 0.01,
"grad_norm": 1.0005278587341309,
"learning_rate": 2.3628691983122364e-06,
"loss": 0.7241,
"step": 112
},
{
"epoch": 0.01,
"grad_norm": 1.1468347311019897,
"learning_rate": 2.3839662447257386e-06,
"loss": 0.7699,
"step": 113
},
{
"epoch": 0.01,
"grad_norm": 1.0486509799957275,
"learning_rate": 2.4050632911392408e-06,
"loss": 0.7698,
"step": 114
},
{
"epoch": 0.01,
"grad_norm": 1.0754250288009644,
"learning_rate": 2.426160337552743e-06,
"loss": 0.7577,
"step": 115
},
{
"epoch": 0.01,
"grad_norm": 1.0635970830917358,
"learning_rate": 2.447257383966245e-06,
"loss": 0.7201,
"step": 116
},
{
"epoch": 0.01,
"grad_norm": 1.1205726861953735,
"learning_rate": 2.4683544303797473e-06,
"loss": 0.8335,
"step": 117
},
{
"epoch": 0.01,
"grad_norm": 1.070901870727539,
"learning_rate": 2.489451476793249e-06,
"loss": 0.7949,
"step": 118
},
{
"epoch": 0.01,
"grad_norm": 1.0840576887130737,
"learning_rate": 2.5105485232067513e-06,
"loss": 0.7587,
"step": 119
},
{
"epoch": 0.01,
"grad_norm": 1.1758044958114624,
"learning_rate": 2.5316455696202535e-06,
"loss": 0.8069,
"step": 120
},
{
"epoch": 0.01,
"grad_norm": 1.0169506072998047,
"learning_rate": 2.5527426160337553e-06,
"loss": 0.7996,
"step": 121
},
{
"epoch": 0.01,
"grad_norm": 1.0591812133789062,
"learning_rate": 2.573839662447258e-06,
"loss": 0.7231,
"step": 122
},
{
"epoch": 0.01,
"grad_norm": 1.1686660051345825,
"learning_rate": 2.5949367088607596e-06,
"loss": 0.7476,
"step": 123
},
{
"epoch": 0.01,
"grad_norm": 1.0610604286193848,
"learning_rate": 2.6160337552742622e-06,
"loss": 0.7395,
"step": 124
},
{
"epoch": 0.01,
"grad_norm": 1.0829620361328125,
"learning_rate": 2.637130801687764e-06,
"loss": 0.8376,
"step": 125
},
{
"epoch": 0.01,
"grad_norm": 1.088221788406372,
"learning_rate": 2.6582278481012658e-06,
"loss": 0.7102,
"step": 126
},
{
"epoch": 0.01,
"grad_norm": 1.0730963945388794,
"learning_rate": 2.679324894514768e-06,
"loss": 0.7688,
"step": 127
},
{
"epoch": 0.01,
"grad_norm": 1.0567976236343384,
"learning_rate": 2.70042194092827e-06,
"loss": 0.7138,
"step": 128
},
{
"epoch": 0.01,
"grad_norm": 1.079959511756897,
"learning_rate": 2.7215189873417724e-06,
"loss": 0.7664,
"step": 129
},
{
"epoch": 0.01,
"grad_norm": 1.0153189897537231,
"learning_rate": 2.742616033755274e-06,
"loss": 0.7066,
"step": 130
},
{
"epoch": 0.01,
"grad_norm": 1.1893813610076904,
"learning_rate": 2.7637130801687767e-06,
"loss": 0.6818,
"step": 131
},
{
"epoch": 0.01,
"grad_norm": 1.1453039646148682,
"learning_rate": 2.7848101265822785e-06,
"loss": 0.8184,
"step": 132
},
{
"epoch": 0.01,
"grad_norm": 1.1458662748336792,
"learning_rate": 2.805907172995781e-06,
"loss": 0.7722,
"step": 133
},
{
"epoch": 0.01,
"grad_norm": 1.098507046699524,
"learning_rate": 2.827004219409283e-06,
"loss": 0.7963,
"step": 134
},
{
"epoch": 0.01,
"grad_norm": 1.0800371170043945,
"learning_rate": 2.848101265822785e-06,
"loss": 0.7168,
"step": 135
},
{
"epoch": 0.01,
"grad_norm": 1.0287774801254272,
"learning_rate": 2.8691983122362873e-06,
"loss": 0.7627,
"step": 136
},
{
"epoch": 0.01,
"grad_norm": 1.0763013362884521,
"learning_rate": 2.8902953586497895e-06,
"loss": 0.8135,
"step": 137
},
{
"epoch": 0.01,
"grad_norm": 1.2011860609054565,
"learning_rate": 2.9113924050632912e-06,
"loss": 0.7457,
"step": 138
},
{
"epoch": 0.01,
"grad_norm": 1.1453096866607666,
"learning_rate": 2.932489451476794e-06,
"loss": 0.7488,
"step": 139
},
{
"epoch": 0.01,
"grad_norm": 1.077102541923523,
"learning_rate": 2.9535864978902956e-06,
"loss": 0.7586,
"step": 140
},
{
"epoch": 0.01,
"grad_norm": 1.0777740478515625,
"learning_rate": 2.9746835443037974e-06,
"loss": 0.7453,
"step": 141
},
{
"epoch": 0.01,
"grad_norm": 1.0961055755615234,
"learning_rate": 2.9957805907173e-06,
"loss": 0.7509,
"step": 142
},
{
"epoch": 0.01,
"grad_norm": 1.0943870544433594,
"learning_rate": 3.0168776371308017e-06,
"loss": 0.7719,
"step": 143
},
{
"epoch": 0.01,
"grad_norm": 0.9995068907737732,
"learning_rate": 3.037974683544304e-06,
"loss": 0.7562,
"step": 144
},
{
"epoch": 0.01,
"grad_norm": 1.0800971984863281,
"learning_rate": 3.059071729957806e-06,
"loss": 0.8042,
"step": 145
},
{
"epoch": 0.01,
"grad_norm": 1.1074168682098389,
"learning_rate": 3.0801687763713083e-06,
"loss": 0.7368,
"step": 146
},
{
"epoch": 0.01,
"grad_norm": 1.1656383275985718,
"learning_rate": 3.10126582278481e-06,
"loss": 0.7654,
"step": 147
},
{
"epoch": 0.01,
"grad_norm": 1.1436306238174438,
"learning_rate": 3.1223628691983127e-06,
"loss": 0.7259,
"step": 148
},
{
"epoch": 0.01,
"grad_norm": 1.070346474647522,
"learning_rate": 3.1434599156118145e-06,
"loss": 0.7054,
"step": 149
},
{
"epoch": 0.01,
"grad_norm": 1.1207927465438843,
"learning_rate": 3.164556962025317e-06,
"loss": 0.7837,
"step": 150
},
{
"epoch": 0.01,
"grad_norm": 1.1490936279296875,
"learning_rate": 3.185654008438819e-06,
"loss": 0.7669,
"step": 151
},
{
"epoch": 0.01,
"grad_norm": 1.1072289943695068,
"learning_rate": 3.206751054852321e-06,
"loss": 0.7523,
"step": 152
},
{
"epoch": 0.01,
"grad_norm": 1.0876787900924683,
"learning_rate": 3.2278481012658232e-06,
"loss": 0.7732,
"step": 153
},
{
"epoch": 0.01,
"grad_norm": 1.1508756875991821,
"learning_rate": 3.248945147679325e-06,
"loss": 0.7658,
"step": 154
},
{
"epoch": 0.01,
"grad_norm": 1.2632142305374146,
"learning_rate": 3.270042194092827e-06,
"loss": 0.7805,
"step": 155
},
{
"epoch": 0.01,
"grad_norm": 1.0064738988876343,
"learning_rate": 3.2911392405063294e-06,
"loss": 0.7025,
"step": 156
},
{
"epoch": 0.01,
"grad_norm": 1.0723457336425781,
"learning_rate": 3.3122362869198316e-06,
"loss": 0.8056,
"step": 157
},
{
"epoch": 0.01,
"grad_norm": 1.0933613777160645,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.7526,
"step": 158
},
{
"epoch": 0.01,
"grad_norm": 1.164417028427124,
"learning_rate": 3.354430379746836e-06,
"loss": 0.7202,
"step": 159
},
{
"epoch": 0.01,
"grad_norm": 1.1357107162475586,
"learning_rate": 3.3755274261603377e-06,
"loss": 0.6892,
"step": 160
},
{
"epoch": 0.01,
"grad_norm": 1.1236298084259033,
"learning_rate": 3.39662447257384e-06,
"loss": 0.8033,
"step": 161
},
{
"epoch": 0.01,
"grad_norm": 1.0229138135910034,
"learning_rate": 3.417721518987342e-06,
"loss": 0.7655,
"step": 162
},
{
"epoch": 0.01,
"grad_norm": 1.1775723695755005,
"learning_rate": 3.4388185654008443e-06,
"loss": 0.7653,
"step": 163
},
{
"epoch": 0.01,
"grad_norm": 1.0776041746139526,
"learning_rate": 3.459915611814346e-06,
"loss": 0.7152,
"step": 164
},
{
"epoch": 0.01,
"grad_norm": 1.0283170938491821,
"learning_rate": 3.4810126582278487e-06,
"loss": 0.7518,
"step": 165
},
{
"epoch": 0.01,
"grad_norm": 1.0690875053405762,
"learning_rate": 3.5021097046413504e-06,
"loss": 0.8014,
"step": 166
},
{
"epoch": 0.01,
"grad_norm": 1.068129301071167,
"learning_rate": 3.523206751054853e-06,
"loss": 0.7015,
"step": 167
},
{
"epoch": 0.01,
"grad_norm": 1.0456384420394897,
"learning_rate": 3.544303797468355e-06,
"loss": 0.7503,
"step": 168
},
{
"epoch": 0.01,
"grad_norm": 1.148189663887024,
"learning_rate": 3.5654008438818566e-06,
"loss": 0.7158,
"step": 169
},
{
"epoch": 0.01,
"grad_norm": 1.050423264503479,
"learning_rate": 3.586497890295359e-06,
"loss": 0.7687,
"step": 170
},
{
"epoch": 0.01,
"grad_norm": 1.0767706632614136,
"learning_rate": 3.607594936708861e-06,
"loss": 0.7401,
"step": 171
},
{
"epoch": 0.01,
"grad_norm": 1.0276440382003784,
"learning_rate": 3.628691983122363e-06,
"loss": 0.798,
"step": 172
},
{
"epoch": 0.01,
"grad_norm": 1.1560982465744019,
"learning_rate": 3.649789029535865e-06,
"loss": 0.7576,
"step": 173
},
{
"epoch": 0.01,
"grad_norm": 1.0696228742599487,
"learning_rate": 3.6708860759493675e-06,
"loss": 0.7687,
"step": 174
},
{
"epoch": 0.01,
"grad_norm": 1.0968499183654785,
"learning_rate": 3.6919831223628693e-06,
"loss": 0.7254,
"step": 175
},
{
"epoch": 0.01,
"grad_norm": 1.0895642042160034,
"learning_rate": 3.713080168776372e-06,
"loss": 0.7755,
"step": 176
},
{
"epoch": 0.01,
"grad_norm": 1.0401157140731812,
"learning_rate": 3.7341772151898737e-06,
"loss": 0.729,
"step": 177
},
{
"epoch": 0.01,
"grad_norm": 1.1051784753799438,
"learning_rate": 3.755274261603376e-06,
"loss": 0.7117,
"step": 178
},
{
"epoch": 0.01,
"grad_norm": 1.12349271774292,
"learning_rate": 3.776371308016878e-06,
"loss": 0.7652,
"step": 179
},
{
"epoch": 0.01,
"grad_norm": 1.0855416059494019,
"learning_rate": 3.7974683544303802e-06,
"loss": 0.6883,
"step": 180
},
{
"epoch": 0.01,
"grad_norm": 1.1745630502700806,
"learning_rate": 3.818565400843882e-06,
"loss": 0.7449,
"step": 181
},
{
"epoch": 0.01,
"grad_norm": 1.0661110877990723,
"learning_rate": 3.839662447257384e-06,
"loss": 0.7232,
"step": 182
},
{
"epoch": 0.01,
"grad_norm": 1.0649524927139282,
"learning_rate": 3.860759493670886e-06,
"loss": 0.7632,
"step": 183
},
{
"epoch": 0.01,
"grad_norm": 1.128460168838501,
"learning_rate": 3.8818565400843886e-06,
"loss": 0.7516,
"step": 184
},
{
"epoch": 0.01,
"grad_norm": 1.1216446161270142,
"learning_rate": 3.902953586497891e-06,
"loss": 0.673,
"step": 185
},
{
"epoch": 0.01,
"grad_norm": 1.1039458513259888,
"learning_rate": 3.924050632911393e-06,
"loss": 0.7259,
"step": 186
},
{
"epoch": 0.01,
"grad_norm": 1.0785599946975708,
"learning_rate": 3.945147679324895e-06,
"loss": 0.7689,
"step": 187
},
{
"epoch": 0.01,
"grad_norm": 1.1407884359359741,
"learning_rate": 3.9662447257383965e-06,
"loss": 0.7762,
"step": 188
},
{
"epoch": 0.01,
"grad_norm": 1.2207587957382202,
"learning_rate": 3.9873417721518995e-06,
"loss": 0.7593,
"step": 189
},
{
"epoch": 0.01,
"grad_norm": 1.126558542251587,
"learning_rate": 4.008438818565401e-06,
"loss": 0.7983,
"step": 190
},
{
"epoch": 0.01,
"grad_norm": 1.0701903104782104,
"learning_rate": 4.029535864978903e-06,
"loss": 0.7166,
"step": 191
},
{
"epoch": 0.01,
"grad_norm": 1.0827971696853638,
"learning_rate": 4.050632911392405e-06,
"loss": 0.7339,
"step": 192
},
{
"epoch": 0.01,
"grad_norm": 0.9973072409629822,
"learning_rate": 4.0717299578059074e-06,
"loss": 0.6799,
"step": 193
},
{
"epoch": 0.01,
"grad_norm": 1.05364990234375,
"learning_rate": 4.09282700421941e-06,
"loss": 0.7106,
"step": 194
},
{
"epoch": 0.01,
"grad_norm": 1.0435612201690674,
"learning_rate": 4.113924050632912e-06,
"loss": 0.8139,
"step": 195
},
{
"epoch": 0.01,
"grad_norm": 1.0995101928710938,
"learning_rate": 4.135021097046414e-06,
"loss": 0.7309,
"step": 196
},
{
"epoch": 0.01,
"grad_norm": 1.0613794326782227,
"learning_rate": 4.156118143459915e-06,
"loss": 0.7698,
"step": 197
},
{
"epoch": 0.01,
"grad_norm": 1.1413551568984985,
"learning_rate": 4.177215189873418e-06,
"loss": 0.7912,
"step": 198
},
{
"epoch": 0.01,
"grad_norm": 1.0940371751785278,
"learning_rate": 4.19831223628692e-06,
"loss": 0.7316,
"step": 199
},
{
"epoch": 0.01,
"grad_norm": 1.1012215614318848,
"learning_rate": 4.219409282700423e-06,
"loss": 0.7178,
"step": 200
},
{
"epoch": 0.01,
"grad_norm": 1.1062922477722168,
"learning_rate": 4.240506329113924e-06,
"loss": 0.7409,
"step": 201
},
{
"epoch": 0.01,
"grad_norm": 1.1517467498779297,
"learning_rate": 4.261603375527426e-06,
"loss": 0.8392,
"step": 202
},
{
"epoch": 0.01,
"grad_norm": 0.9980434775352478,
"learning_rate": 4.2827004219409285e-06,
"loss": 0.7081,
"step": 203
},
{
"epoch": 0.01,
"grad_norm": 1.190421223640442,
"learning_rate": 4.303797468354431e-06,
"loss": 0.7573,
"step": 204
},
{
"epoch": 0.01,
"grad_norm": 1.1169999837875366,
"learning_rate": 4.324894514767933e-06,
"loss": 0.7444,
"step": 205
},
{
"epoch": 0.01,
"grad_norm": 1.1159812211990356,
"learning_rate": 4.345991561181435e-06,
"loss": 0.7336,
"step": 206
},
{
"epoch": 0.01,
"grad_norm": 1.016315221786499,
"learning_rate": 4.367088607594937e-06,
"loss": 0.7329,
"step": 207
},
{
"epoch": 0.01,
"grad_norm": 1.1287728548049927,
"learning_rate": 4.3881856540084394e-06,
"loss": 0.7638,
"step": 208
},
{
"epoch": 0.01,
"grad_norm": 1.0038844347000122,
"learning_rate": 4.409282700421942e-06,
"loss": 0.6823,
"step": 209
},
{
"epoch": 0.01,
"grad_norm": 1.0368938446044922,
"learning_rate": 4.430379746835443e-06,
"loss": 0.7563,
"step": 210
},
{
"epoch": 0.01,
"grad_norm": 1.215239405632019,
"learning_rate": 4.451476793248945e-06,
"loss": 0.7747,
"step": 211
},
{
"epoch": 0.01,
"grad_norm": 0.9736345410346985,
"learning_rate": 4.472573839662447e-06,
"loss": 0.6811,
"step": 212
},
{
"epoch": 0.01,
"grad_norm": 1.133034110069275,
"learning_rate": 4.4936708860759495e-06,
"loss": 0.8181,
"step": 213
},
{
"epoch": 0.01,
"grad_norm": 1.1740864515304565,
"learning_rate": 4.514767932489452e-06,
"loss": 0.7113,
"step": 214
},
{
"epoch": 0.01,
"grad_norm": 1.0824915170669556,
"learning_rate": 4.535864978902954e-06,
"loss": 0.7522,
"step": 215
},
{
"epoch": 0.01,
"grad_norm": 1.0215483903884888,
"learning_rate": 4.556962025316456e-06,
"loss": 0.7099,
"step": 216
},
{
"epoch": 0.01,
"grad_norm": 1.1021614074707031,
"learning_rate": 4.578059071729958e-06,
"loss": 0.6993,
"step": 217
},
{
"epoch": 0.01,
"grad_norm": 1.0713529586791992,
"learning_rate": 4.5991561181434605e-06,
"loss": 0.7765,
"step": 218
},
{
"epoch": 0.01,
"grad_norm": 1.0062373876571655,
"learning_rate": 4.620253164556963e-06,
"loss": 0.7104,
"step": 219
},
{
"epoch": 0.01,
"grad_norm": 1.0480597019195557,
"learning_rate": 4.641350210970465e-06,
"loss": 0.6808,
"step": 220
},
{
"epoch": 0.01,
"grad_norm": 1.0954842567443848,
"learning_rate": 4.662447257383967e-06,
"loss": 0.7508,
"step": 221
},
{
"epoch": 0.01,
"grad_norm": 1.1099358797073364,
"learning_rate": 4.683544303797468e-06,
"loss": 0.7075,
"step": 222
},
{
"epoch": 0.01,
"grad_norm": 1.0071532726287842,
"learning_rate": 4.7046413502109714e-06,
"loss": 0.6977,
"step": 223
},
{
"epoch": 0.01,
"grad_norm": 1.2051150798797607,
"learning_rate": 4.725738396624473e-06,
"loss": 0.7822,
"step": 224
},
{
"epoch": 0.01,
"grad_norm": 1.0721451044082642,
"learning_rate": 4.746835443037975e-06,
"loss": 0.7405,
"step": 225
},
{
"epoch": 0.01,
"grad_norm": 1.093891978263855,
"learning_rate": 4.767932489451477e-06,
"loss": 0.696,
"step": 226
},
{
"epoch": 0.01,
"grad_norm": 1.030121088027954,
"learning_rate": 4.789029535864979e-06,
"loss": 0.6639,
"step": 227
},
{
"epoch": 0.01,
"grad_norm": 1.029396653175354,
"learning_rate": 4.8101265822784815e-06,
"loss": 0.6721,
"step": 228
},
{
"epoch": 0.01,
"grad_norm": 1.075605034828186,
"learning_rate": 4.831223628691984e-06,
"loss": 0.767,
"step": 229
},
{
"epoch": 0.01,
"grad_norm": 1.1397629976272583,
"learning_rate": 4.852320675105486e-06,
"loss": 0.7567,
"step": 230
},
{
"epoch": 0.01,
"grad_norm": 0.9947602152824402,
"learning_rate": 4.873417721518987e-06,
"loss": 0.6579,
"step": 231
},
{
"epoch": 0.01,
"grad_norm": 1.1511805057525635,
"learning_rate": 4.89451476793249e-06,
"loss": 0.775,
"step": 232
},
{
"epoch": 0.01,
"grad_norm": 1.1157087087631226,
"learning_rate": 4.915611814345992e-06,
"loss": 0.7197,
"step": 233
},
{
"epoch": 0.01,
"grad_norm": 1.041972041130066,
"learning_rate": 4.936708860759495e-06,
"loss": 0.6443,
"step": 234
},
{
"epoch": 0.01,
"grad_norm": 1.1057621240615845,
"learning_rate": 4.957805907172996e-06,
"loss": 0.7262,
"step": 235
},
{
"epoch": 0.01,
"grad_norm": 1.0165050029754639,
"learning_rate": 4.978902953586498e-06,
"loss": 0.6992,
"step": 236
},
{
"epoch": 0.02,
"grad_norm": 1.093250036239624,
"learning_rate": 5e-06,
"loss": 0.7268,
"step": 237
},
{
"epoch": 0.02,
"grad_norm": 1.0492795705795288,
"learning_rate": 5.021097046413503e-06,
"loss": 0.7542,
"step": 238
},
{
"epoch": 0.02,
"grad_norm": 1.102603793144226,
"learning_rate": 5.042194092827004e-06,
"loss": 0.7308,
"step": 239
},
{
"epoch": 0.02,
"grad_norm": 1.078536033630371,
"learning_rate": 5.063291139240507e-06,
"loss": 0.6885,
"step": 240
},
{
"epoch": 0.02,
"grad_norm": 1.0672438144683838,
"learning_rate": 5.084388185654009e-06,
"loss": 0.7401,
"step": 241
},
{
"epoch": 0.02,
"grad_norm": 1.0409996509552002,
"learning_rate": 5.1054852320675105e-06,
"loss": 0.6583,
"step": 242
},
{
"epoch": 0.02,
"grad_norm": 1.0213676691055298,
"learning_rate": 5.126582278481013e-06,
"loss": 0.6894,
"step": 243
},
{
"epoch": 0.02,
"grad_norm": 1.00473952293396,
"learning_rate": 5.147679324894516e-06,
"loss": 0.6331,
"step": 244
},
{
"epoch": 0.02,
"grad_norm": 1.0353198051452637,
"learning_rate": 5.168776371308017e-06,
"loss": 0.6998,
"step": 245
},
{
"epoch": 0.02,
"grad_norm": 1.0606788396835327,
"learning_rate": 5.189873417721519e-06,
"loss": 0.7192,
"step": 246
},
{
"epoch": 0.02,
"grad_norm": 1.0762742757797241,
"learning_rate": 5.2109704641350215e-06,
"loss": 0.7234,
"step": 247
},
{
"epoch": 0.02,
"grad_norm": 1.0901657342910767,
"learning_rate": 5.2320675105485245e-06,
"loss": 0.7279,
"step": 248
},
{
"epoch": 0.02,
"grad_norm": 1.0092108249664307,
"learning_rate": 5.253164556962026e-06,
"loss": 0.7626,
"step": 249
},
{
"epoch": 0.02,
"grad_norm": 1.0560524463653564,
"learning_rate": 5.274261603375528e-06,
"loss": 0.6912,
"step": 250
},
{
"epoch": 0.02,
"grad_norm": 1.0830129384994507,
"learning_rate": 5.295358649789029e-06,
"loss": 0.7379,
"step": 251
},
{
"epoch": 0.02,
"grad_norm": 1.0292439460754395,
"learning_rate": 5.3164556962025316e-06,
"loss": 0.7024,
"step": 252
},
{
"epoch": 0.02,
"grad_norm": 1.0570290088653564,
"learning_rate": 5.337552742616035e-06,
"loss": 0.7485,
"step": 253
},
{
"epoch": 0.02,
"grad_norm": 1.098766565322876,
"learning_rate": 5.358649789029536e-06,
"loss": 0.7215,
"step": 254
},
{
"epoch": 0.02,
"grad_norm": 1.038459062576294,
"learning_rate": 5.379746835443038e-06,
"loss": 0.7304,
"step": 255
},
{
"epoch": 0.02,
"grad_norm": 1.0401676893234253,
"learning_rate": 5.40084388185654e-06,
"loss": 0.711,
"step": 256
},
{
"epoch": 0.02,
"grad_norm": 1.1514108180999756,
"learning_rate": 5.421940928270043e-06,
"loss": 0.7406,
"step": 257
},
{
"epoch": 0.02,
"grad_norm": 1.0562361478805542,
"learning_rate": 5.443037974683545e-06,
"loss": 0.6916,
"step": 258
},
{
"epoch": 0.02,
"grad_norm": 1.0437403917312622,
"learning_rate": 5.464135021097047e-06,
"loss": 0.7645,
"step": 259
},
{
"epoch": 0.02,
"grad_norm": 1.0529121160507202,
"learning_rate": 5.485232067510548e-06,
"loss": 0.7039,
"step": 260
},
{
"epoch": 0.02,
"grad_norm": 0.9928483963012695,
"learning_rate": 5.506329113924051e-06,
"loss": 0.7255,
"step": 261
},
{
"epoch": 0.02,
"grad_norm": 1.149880290031433,
"learning_rate": 5.5274261603375535e-06,
"loss": 0.6802,
"step": 262
},
{
"epoch": 0.02,
"grad_norm": 1.0575579404830933,
"learning_rate": 5.548523206751056e-06,
"loss": 0.6778,
"step": 263
},
{
"epoch": 0.02,
"grad_norm": 1.055155634880066,
"learning_rate": 5.569620253164557e-06,
"loss": 0.7686,
"step": 264
},
{
"epoch": 0.02,
"grad_norm": 1.0442339181900024,
"learning_rate": 5.590717299578059e-06,
"loss": 0.7301,
"step": 265
},
{
"epoch": 0.02,
"grad_norm": 1.128800868988037,
"learning_rate": 5.611814345991562e-06,
"loss": 0.7405,
"step": 266
},
{
"epoch": 0.02,
"grad_norm": 1.045422077178955,
"learning_rate": 5.6329113924050636e-06,
"loss": 0.7035,
"step": 267
},
{
"epoch": 0.02,
"grad_norm": 1.1714004278182983,
"learning_rate": 5.654008438818566e-06,
"loss": 0.6549,
"step": 268
},
{
"epoch": 0.02,
"grad_norm": 1.024259328842163,
"learning_rate": 5.675105485232067e-06,
"loss": 0.7305,
"step": 269
},
{
"epoch": 0.02,
"grad_norm": 0.9970294237136841,
"learning_rate": 5.69620253164557e-06,
"loss": 0.7004,
"step": 270
},
{
"epoch": 0.02,
"grad_norm": 1.1413666009902954,
"learning_rate": 5.717299578059072e-06,
"loss": 0.7865,
"step": 271
},
{
"epoch": 0.02,
"grad_norm": 1.0278419256210327,
"learning_rate": 5.7383966244725745e-06,
"loss": 0.7116,
"step": 272
},
{
"epoch": 0.02,
"grad_norm": 1.1053383350372314,
"learning_rate": 5.759493670886076e-06,
"loss": 0.7703,
"step": 273
},
{
"epoch": 0.02,
"grad_norm": 1.0635850429534912,
"learning_rate": 5.780590717299579e-06,
"loss": 0.7128,
"step": 274
},
{
"epoch": 0.02,
"grad_norm": 1.2253458499908447,
"learning_rate": 5.801687763713081e-06,
"loss": 0.7689,
"step": 275
},
{
"epoch": 0.02,
"grad_norm": 1.152657151222229,
"learning_rate": 5.8227848101265824e-06,
"loss": 0.7243,
"step": 276
},
{
"epoch": 0.02,
"grad_norm": 1.0314563512802124,
"learning_rate": 5.843881856540085e-06,
"loss": 0.702,
"step": 277
},
{
"epoch": 0.02,
"grad_norm": 0.9639416933059692,
"learning_rate": 5.864978902953588e-06,
"loss": 0.6504,
"step": 278
},
{
"epoch": 0.02,
"grad_norm": 1.0700966119766235,
"learning_rate": 5.886075949367089e-06,
"loss": 0.7275,
"step": 279
},
{
"epoch": 0.02,
"grad_norm": 1.0939685106277466,
"learning_rate": 5.907172995780591e-06,
"loss": 0.7193,
"step": 280
},
{
"epoch": 0.02,
"grad_norm": 1.0693538188934326,
"learning_rate": 5.928270042194093e-06,
"loss": 0.7746,
"step": 281
},
{
"epoch": 0.02,
"grad_norm": 1.057306170463562,
"learning_rate": 5.949367088607595e-06,
"loss": 0.7504,
"step": 282
},
{
"epoch": 0.02,
"grad_norm": 1.1040732860565186,
"learning_rate": 5.970464135021098e-06,
"loss": 0.7458,
"step": 283
},
{
"epoch": 0.02,
"grad_norm": 1.031706690788269,
"learning_rate": 5.9915611814346e-06,
"loss": 0.7131,
"step": 284
},
{
"epoch": 0.02,
"grad_norm": 1.0323573350906372,
"learning_rate": 6.012658227848101e-06,
"loss": 0.6868,
"step": 285
},
{
"epoch": 0.02,
"grad_norm": 1.0617097616195679,
"learning_rate": 6.0337552742616035e-06,
"loss": 0.63,
"step": 286
},
{
"epoch": 0.02,
"grad_norm": 1.11605703830719,
"learning_rate": 6.0548523206751065e-06,
"loss": 0.6858,
"step": 287
},
{
"epoch": 0.02,
"grad_norm": 1.0047672986984253,
"learning_rate": 6.075949367088608e-06,
"loss": 0.6639,
"step": 288
},
{
"epoch": 0.02,
"grad_norm": 1.131504774093628,
"learning_rate": 6.09704641350211e-06,
"loss": 0.7063,
"step": 289
},
{
"epoch": 0.02,
"grad_norm": 1.063057780265808,
"learning_rate": 6.118143459915612e-06,
"loss": 0.741,
"step": 290
},
{
"epoch": 0.02,
"grad_norm": 1.0435850620269775,
"learning_rate": 6.139240506329115e-06,
"loss": 0.6867,
"step": 291
},
{
"epoch": 0.02,
"grad_norm": 1.102751612663269,
"learning_rate": 6.160337552742617e-06,
"loss": 0.7544,
"step": 292
},
{
"epoch": 0.02,
"grad_norm": 1.1484432220458984,
"learning_rate": 6.181434599156119e-06,
"loss": 0.7241,
"step": 293
},
{
"epoch": 0.02,
"grad_norm": 1.1322616338729858,
"learning_rate": 6.20253164556962e-06,
"loss": 0.7311,
"step": 294
},
{
"epoch": 0.02,
"grad_norm": 1.077012300491333,
"learning_rate": 6.223628691983122e-06,
"loss": 0.7061,
"step": 295
},
{
"epoch": 0.02,
"grad_norm": 1.0520391464233398,
"learning_rate": 6.244725738396625e-06,
"loss": 0.7375,
"step": 296
},
{
"epoch": 0.02,
"grad_norm": 1.0452332496643066,
"learning_rate": 6.265822784810128e-06,
"loss": 0.6791,
"step": 297
},
{
"epoch": 0.02,
"grad_norm": 1.037235975265503,
"learning_rate": 6.286919831223629e-06,
"loss": 0.7208,
"step": 298
},
{
"epoch": 0.02,
"grad_norm": 1.0935461521148682,
"learning_rate": 6.308016877637131e-06,
"loss": 0.679,
"step": 299
},
{
"epoch": 0.02,
"grad_norm": 1.1446540355682373,
"learning_rate": 6.329113924050634e-06,
"loss": 0.7607,
"step": 300
},
{
"epoch": 0.02,
"grad_norm": 1.0081703662872314,
"learning_rate": 6.3502109704641355e-06,
"loss": 0.6801,
"step": 301
},
{
"epoch": 0.02,
"grad_norm": 1.042680025100708,
"learning_rate": 6.371308016877638e-06,
"loss": 0.709,
"step": 302
},
{
"epoch": 0.02,
"grad_norm": 0.9972267150878906,
"learning_rate": 6.392405063291139e-06,
"loss": 0.7027,
"step": 303
},
{
"epoch": 0.02,
"grad_norm": 1.1144083738327026,
"learning_rate": 6.413502109704642e-06,
"loss": 0.7594,
"step": 304
},
{
"epoch": 0.02,
"grad_norm": 1.0191450119018555,
"learning_rate": 6.434599156118144e-06,
"loss": 0.6988,
"step": 305
},
{
"epoch": 0.02,
"grad_norm": 1.0765477418899536,
"learning_rate": 6.4556962025316464e-06,
"loss": 0.6887,
"step": 306
},
{
"epoch": 0.02,
"grad_norm": 1.0328569412231445,
"learning_rate": 6.476793248945148e-06,
"loss": 0.6941,
"step": 307
},
{
"epoch": 0.02,
"grad_norm": 1.060673475265503,
"learning_rate": 6.49789029535865e-06,
"loss": 0.7643,
"step": 308
},
{
"epoch": 0.02,
"grad_norm": 1.0403449535369873,
"learning_rate": 6.518987341772153e-06,
"loss": 0.7178,
"step": 309
},
{
"epoch": 0.02,
"grad_norm": 1.000282883644104,
"learning_rate": 6.540084388185654e-06,
"loss": 0.6678,
"step": 310
},
{
"epoch": 0.02,
"grad_norm": 0.988453209400177,
"learning_rate": 6.5611814345991565e-06,
"loss": 0.6962,
"step": 311
},
{
"epoch": 0.02,
"grad_norm": 1.0525801181793213,
"learning_rate": 6.582278481012659e-06,
"loss": 0.6947,
"step": 312
},
{
"epoch": 0.02,
"grad_norm": 1.1088162660598755,
"learning_rate": 6.603375527426161e-06,
"loss": 0.7701,
"step": 313
},
{
"epoch": 0.02,
"grad_norm": 1.0845990180969238,
"learning_rate": 6.624472573839663e-06,
"loss": 0.6622,
"step": 314
},
{
"epoch": 0.02,
"grad_norm": 1.1164056062698364,
"learning_rate": 6.645569620253165e-06,
"loss": 0.6881,
"step": 315
},
{
"epoch": 0.02,
"grad_norm": 1.112685203552246,
"learning_rate": 6.666666666666667e-06,
"loss": 0.6864,
"step": 316
},
{
"epoch": 0.02,
"grad_norm": 1.0409352779388428,
"learning_rate": 6.68776371308017e-06,
"loss": 0.6724,
"step": 317
},
{
"epoch": 0.02,
"grad_norm": 1.0470497608184814,
"learning_rate": 6.708860759493672e-06,
"loss": 0.7243,
"step": 318
},
{
"epoch": 0.02,
"grad_norm": 1.1536136865615845,
"learning_rate": 6.729957805907173e-06,
"loss": 0.7587,
"step": 319
},
{
"epoch": 0.02,
"grad_norm": 1.1283690929412842,
"learning_rate": 6.751054852320675e-06,
"loss": 0.7378,
"step": 320
},
{
"epoch": 0.02,
"grad_norm": 1.1128261089324951,
"learning_rate": 6.772151898734178e-06,
"loss": 0.7279,
"step": 321
},
{
"epoch": 0.02,
"grad_norm": 0.973404586315155,
"learning_rate": 6.79324894514768e-06,
"loss": 0.7067,
"step": 322
},
{
"epoch": 0.02,
"grad_norm": 1.0499427318572998,
"learning_rate": 6.814345991561182e-06,
"loss": 0.6869,
"step": 323
},
{
"epoch": 0.02,
"grad_norm": 1.047536849975586,
"learning_rate": 6.835443037974684e-06,
"loss": 0.6862,
"step": 324
},
{
"epoch": 0.02,
"grad_norm": 1.076842188835144,
"learning_rate": 6.8565400843881855e-06,
"loss": 0.6522,
"step": 325
},
{
"epoch": 0.02,
"grad_norm": 1.1328686475753784,
"learning_rate": 6.8776371308016885e-06,
"loss": 0.7842,
"step": 326
},
{
"epoch": 0.02,
"grad_norm": 1.05484938621521,
"learning_rate": 6.898734177215191e-06,
"loss": 0.7303,
"step": 327
},
{
"epoch": 0.02,
"grad_norm": 1.0914195775985718,
"learning_rate": 6.919831223628692e-06,
"loss": 0.7791,
"step": 328
},
{
"epoch": 0.02,
"grad_norm": 1.1371619701385498,
"learning_rate": 6.940928270042194e-06,
"loss": 0.7417,
"step": 329
},
{
"epoch": 0.02,
"grad_norm": 1.0946354866027832,
"learning_rate": 6.962025316455697e-06,
"loss": 0.7099,
"step": 330
},
{
"epoch": 0.02,
"grad_norm": 1.048621416091919,
"learning_rate": 6.9831223628691995e-06,
"loss": 0.7406,
"step": 331
},
{
"epoch": 0.02,
"grad_norm": 1.0043079853057861,
"learning_rate": 7.004219409282701e-06,
"loss": 0.6941,
"step": 332
},
{
"epoch": 0.02,
"grad_norm": 1.0117560625076294,
"learning_rate": 7.025316455696203e-06,
"loss": 0.7254,
"step": 333
},
{
"epoch": 0.02,
"grad_norm": 0.9732447862625122,
"learning_rate": 7.046413502109706e-06,
"loss": 0.6738,
"step": 334
},
{
"epoch": 0.02,
"grad_norm": 1.0345726013183594,
"learning_rate": 7.067510548523207e-06,
"loss": 0.7732,
"step": 335
},
{
"epoch": 0.02,
"grad_norm": 1.0557833909988403,
"learning_rate": 7.08860759493671e-06,
"loss": 0.6691,
"step": 336
},
{
"epoch": 0.02,
"grad_norm": 1.0420573949813843,
"learning_rate": 7.109704641350211e-06,
"loss": 0.6917,
"step": 337
},
{
"epoch": 0.02,
"grad_norm": 1.0846279859542847,
"learning_rate": 7.130801687763713e-06,
"loss": 0.7203,
"step": 338
},
{
"epoch": 0.02,
"grad_norm": 1.0644689798355103,
"learning_rate": 7.151898734177216e-06,
"loss": 0.7264,
"step": 339
},
{
"epoch": 0.02,
"grad_norm": 0.9768193960189819,
"learning_rate": 7.172995780590718e-06,
"loss": 0.635,
"step": 340
},
{
"epoch": 0.02,
"grad_norm": 1.0101126432418823,
"learning_rate": 7.19409282700422e-06,
"loss": 0.6507,
"step": 341
},
{
"epoch": 0.02,
"grad_norm": 1.208484411239624,
"learning_rate": 7.215189873417722e-06,
"loss": 0.7535,
"step": 342
},
{
"epoch": 0.02,
"grad_norm": 1.0944318771362305,
"learning_rate": 7.236286919831225e-06,
"loss": 0.7649,
"step": 343
},
{
"epoch": 0.02,
"grad_norm": 0.9345969557762146,
"learning_rate": 7.257383966244726e-06,
"loss": 0.6838,
"step": 344
},
{
"epoch": 0.02,
"grad_norm": 1.0136656761169434,
"learning_rate": 7.2784810126582285e-06,
"loss": 0.6863,
"step": 345
},
{
"epoch": 0.02,
"grad_norm": 1.0391440391540527,
"learning_rate": 7.29957805907173e-06,
"loss": 0.7484,
"step": 346
},
{
"epoch": 0.02,
"grad_norm": 1.0096555948257446,
"learning_rate": 7.320675105485233e-06,
"loss": 0.6641,
"step": 347
},
{
"epoch": 0.02,
"grad_norm": 1.0947874784469604,
"learning_rate": 7.341772151898735e-06,
"loss": 0.6892,
"step": 348
},
{
"epoch": 0.02,
"grad_norm": 1.0220927000045776,
"learning_rate": 7.362869198312237e-06,
"loss": 0.6493,
"step": 349
},
{
"epoch": 0.02,
"grad_norm": 1.0061572790145874,
"learning_rate": 7.3839662447257386e-06,
"loss": 0.6969,
"step": 350
},
{
"epoch": 0.02,
"grad_norm": 1.142774224281311,
"learning_rate": 7.405063291139241e-06,
"loss": 0.7036,
"step": 351
},
{
"epoch": 0.02,
"grad_norm": 1.1005827188491821,
"learning_rate": 7.426160337552744e-06,
"loss": 0.6813,
"step": 352
},
{
"epoch": 0.02,
"grad_norm": 0.9909974932670593,
"learning_rate": 7.447257383966245e-06,
"loss": 0.6848,
"step": 353
},
{
"epoch": 0.02,
"grad_norm": 1.0379297733306885,
"learning_rate": 7.468354430379747e-06,
"loss": 0.6472,
"step": 354
},
{
"epoch": 0.02,
"grad_norm": 1.0365216732025146,
"learning_rate": 7.4894514767932495e-06,
"loss": 0.7371,
"step": 355
},
{
"epoch": 0.02,
"grad_norm": 1.0643117427825928,
"learning_rate": 7.510548523206752e-06,
"loss": 0.7383,
"step": 356
},
{
"epoch": 0.02,
"grad_norm": 1.0581454038619995,
"learning_rate": 7.531645569620254e-06,
"loss": 0.6001,
"step": 357
},
{
"epoch": 0.02,
"grad_norm": 1.147464632987976,
"learning_rate": 7.552742616033756e-06,
"loss": 0.7362,
"step": 358
},
{
"epoch": 0.02,
"grad_norm": 1.1402760744094849,
"learning_rate": 7.5738396624472574e-06,
"loss": 0.6973,
"step": 359
},
{
"epoch": 0.02,
"grad_norm": 0.987370491027832,
"learning_rate": 7.5949367088607605e-06,
"loss": 0.6186,
"step": 360
},
{
"epoch": 0.02,
"grad_norm": 1.1057859659194946,
"learning_rate": 7.616033755274263e-06,
"loss": 0.758,
"step": 361
},
{
"epoch": 0.02,
"grad_norm": 1.1299041509628296,
"learning_rate": 7.637130801687764e-06,
"loss": 0.6524,
"step": 362
},
{
"epoch": 0.02,
"grad_norm": 1.0638086795806885,
"learning_rate": 7.658227848101265e-06,
"loss": 0.6748,
"step": 363
},
{
"epoch": 0.02,
"grad_norm": 1.1084306240081787,
"learning_rate": 7.679324894514768e-06,
"loss": 0.7294,
"step": 364
},
{
"epoch": 0.02,
"grad_norm": 1.0223629474639893,
"learning_rate": 7.700421940928271e-06,
"loss": 0.7385,
"step": 365
},
{
"epoch": 0.02,
"grad_norm": 0.9762625694274902,
"learning_rate": 7.721518987341773e-06,
"loss": 0.657,
"step": 366
},
{
"epoch": 0.02,
"grad_norm": 1.045155644416809,
"learning_rate": 7.742616033755274e-06,
"loss": 0.6697,
"step": 367
},
{
"epoch": 0.02,
"grad_norm": 1.101192831993103,
"learning_rate": 7.763713080168777e-06,
"loss": 0.7388,
"step": 368
},
{
"epoch": 0.02,
"grad_norm": 0.9734575152397156,
"learning_rate": 7.78481012658228e-06,
"loss": 0.7006,
"step": 369
},
{
"epoch": 0.02,
"grad_norm": 1.0786582231521606,
"learning_rate": 7.805907172995782e-06,
"loss": 0.7253,
"step": 370
},
{
"epoch": 0.02,
"grad_norm": 1.0659972429275513,
"learning_rate": 7.827004219409283e-06,
"loss": 0.6975,
"step": 371
},
{
"epoch": 0.02,
"grad_norm": 1.0065947771072388,
"learning_rate": 7.848101265822786e-06,
"loss": 0.6478,
"step": 372
},
{
"epoch": 0.02,
"grad_norm": 1.066577672958374,
"learning_rate": 7.869198312236287e-06,
"loss": 0.6615,
"step": 373
},
{
"epoch": 0.02,
"grad_norm": 0.9906947612762451,
"learning_rate": 7.89029535864979e-06,
"loss": 0.6473,
"step": 374
},
{
"epoch": 0.02,
"grad_norm": 1.1715826988220215,
"learning_rate": 7.911392405063292e-06,
"loss": 0.7237,
"step": 375
},
{
"epoch": 0.02,
"grad_norm": 0.943070650100708,
"learning_rate": 7.932489451476793e-06,
"loss": 0.6124,
"step": 376
},
{
"epoch": 0.02,
"grad_norm": 1.0601487159729004,
"learning_rate": 7.953586497890296e-06,
"loss": 0.7008,
"step": 377
},
{
"epoch": 0.02,
"grad_norm": 0.9950490593910217,
"learning_rate": 7.974683544303799e-06,
"loss": 0.676,
"step": 378
},
{
"epoch": 0.02,
"grad_norm": 1.107460856437683,
"learning_rate": 7.9957805907173e-06,
"loss": 0.6832,
"step": 379
},
{
"epoch": 0.02,
"grad_norm": 1.111391305923462,
"learning_rate": 8.016877637130802e-06,
"loss": 0.7273,
"step": 380
},
{
"epoch": 0.02,
"grad_norm": 0.9769160747528076,
"learning_rate": 8.037974683544305e-06,
"loss": 0.6351,
"step": 381
},
{
"epoch": 0.02,
"grad_norm": 1.040307879447937,
"learning_rate": 8.059071729957806e-06,
"loss": 0.6979,
"step": 382
},
{
"epoch": 0.02,
"grad_norm": 1.1144888401031494,
"learning_rate": 8.080168776371309e-06,
"loss": 0.733,
"step": 383
},
{
"epoch": 0.02,
"grad_norm": 0.9809250831604004,
"learning_rate": 8.10126582278481e-06,
"loss": 0.7225,
"step": 384
},
{
"epoch": 0.02,
"grad_norm": 1.014562964439392,
"learning_rate": 8.122362869198312e-06,
"loss": 0.6472,
"step": 385
},
{
"epoch": 0.02,
"grad_norm": 1.0310808420181274,
"learning_rate": 8.143459915611815e-06,
"loss": 0.7224,
"step": 386
},
{
"epoch": 0.02,
"grad_norm": 1.026007890701294,
"learning_rate": 8.164556962025318e-06,
"loss": 0.6932,
"step": 387
},
{
"epoch": 0.02,
"grad_norm": 1.062725305557251,
"learning_rate": 8.18565400843882e-06,
"loss": 0.7098,
"step": 388
},
{
"epoch": 0.02,
"grad_norm": 1.1098779439926147,
"learning_rate": 8.20675105485232e-06,
"loss": 0.6994,
"step": 389
},
{
"epoch": 0.02,
"grad_norm": 1.0307683944702148,
"learning_rate": 8.227848101265824e-06,
"loss": 0.784,
"step": 390
},
{
"epoch": 0.02,
"grad_norm": 1.0046746730804443,
"learning_rate": 8.248945147679327e-06,
"loss": 0.6689,
"step": 391
},
{
"epoch": 0.02,
"grad_norm": 1.090871810913086,
"learning_rate": 8.270042194092828e-06,
"loss": 0.6986,
"step": 392
},
{
"epoch": 0.02,
"grad_norm": 0.9817966222763062,
"learning_rate": 8.29113924050633e-06,
"loss": 0.6288,
"step": 393
},
{
"epoch": 0.02,
"grad_norm": 1.004731297492981,
"learning_rate": 8.31223628691983e-06,
"loss": 0.6876,
"step": 394
},
{
"epoch": 0.03,
"grad_norm": 0.9606344103813171,
"learning_rate": 8.333333333333334e-06,
"loss": 0.6722,
"step": 395
},
{
"epoch": 0.03,
"grad_norm": 0.8888660669326782,
"learning_rate": 8.354430379746837e-06,
"loss": 0.6257,
"step": 396
},
{
"epoch": 0.03,
"grad_norm": 1.0444602966308594,
"learning_rate": 8.375527426160338e-06,
"loss": 0.7283,
"step": 397
},
{
"epoch": 0.03,
"grad_norm": 1.0116087198257446,
"learning_rate": 8.39662447257384e-06,
"loss": 0.6951,
"step": 398
},
{
"epoch": 0.03,
"grad_norm": 0.9750226736068726,
"learning_rate": 8.417721518987342e-06,
"loss": 0.6172,
"step": 399
},
{
"epoch": 0.03,
"grad_norm": 0.9671033024787903,
"learning_rate": 8.438818565400846e-06,
"loss": 0.6415,
"step": 400
},
{
"epoch": 0.03,
"grad_norm": 1.0681933164596558,
"learning_rate": 8.459915611814347e-06,
"loss": 0.6908,
"step": 401
},
{
"epoch": 0.03,
"grad_norm": 1.0316442251205444,
"learning_rate": 8.481012658227848e-06,
"loss": 0.6733,
"step": 402
},
{
"epoch": 0.03,
"grad_norm": 1.077885389328003,
"learning_rate": 8.502109704641351e-06,
"loss": 0.7253,
"step": 403
},
{
"epoch": 0.03,
"grad_norm": 1.03955078125,
"learning_rate": 8.523206751054853e-06,
"loss": 0.6548,
"step": 404
},
{
"epoch": 0.03,
"grad_norm": 1.1187814474105835,
"learning_rate": 8.544303797468356e-06,
"loss": 0.6971,
"step": 405
},
{
"epoch": 0.03,
"grad_norm": 1.0932029485702515,
"learning_rate": 8.565400843881857e-06,
"loss": 0.7234,
"step": 406
},
{
"epoch": 0.03,
"grad_norm": 1.0664738416671753,
"learning_rate": 8.586497890295358e-06,
"loss": 0.7203,
"step": 407
},
{
"epoch": 0.03,
"grad_norm": 1.0771609544754028,
"learning_rate": 8.607594936708861e-06,
"loss": 0.6594,
"step": 408
},
{
"epoch": 0.03,
"grad_norm": 1.0817670822143555,
"learning_rate": 8.628691983122364e-06,
"loss": 0.7253,
"step": 409
},
{
"epoch": 0.03,
"grad_norm": 1.0627022981643677,
"learning_rate": 8.649789029535866e-06,
"loss": 0.7192,
"step": 410
},
{
"epoch": 0.03,
"grad_norm": 0.9740765690803528,
"learning_rate": 8.670886075949367e-06,
"loss": 0.7057,
"step": 411
},
{
"epoch": 0.03,
"grad_norm": 0.9944364428520203,
"learning_rate": 8.69198312236287e-06,
"loss": 0.7844,
"step": 412
},
{
"epoch": 0.03,
"grad_norm": 0.9714933633804321,
"learning_rate": 8.713080168776371e-06,
"loss": 0.7249,
"step": 413
},
{
"epoch": 0.03,
"grad_norm": 1.0184885263442993,
"learning_rate": 8.734177215189874e-06,
"loss": 0.647,
"step": 414
},
{
"epoch": 0.03,
"grad_norm": 1.0341577529907227,
"learning_rate": 8.755274261603376e-06,
"loss": 0.6934,
"step": 415
},
{
"epoch": 0.03,
"grad_norm": 1.0786633491516113,
"learning_rate": 8.776371308016879e-06,
"loss": 0.6879,
"step": 416
},
{
"epoch": 0.03,
"grad_norm": 1.0012767314910889,
"learning_rate": 8.79746835443038e-06,
"loss": 0.7064,
"step": 417
},
{
"epoch": 0.03,
"grad_norm": 1.0431938171386719,
"learning_rate": 8.818565400843883e-06,
"loss": 0.7309,
"step": 418
},
{
"epoch": 0.03,
"grad_norm": 0.9838118553161621,
"learning_rate": 8.839662447257385e-06,
"loss": 0.6781,
"step": 419
},
{
"epoch": 0.03,
"grad_norm": 1.0146209001541138,
"learning_rate": 8.860759493670886e-06,
"loss": 0.699,
"step": 420
},
{
"epoch": 0.03,
"grad_norm": 1.0076758861541748,
"learning_rate": 8.881856540084389e-06,
"loss": 0.7561,
"step": 421
},
{
"epoch": 0.03,
"grad_norm": 0.9734664559364319,
"learning_rate": 8.90295358649789e-06,
"loss": 0.6111,
"step": 422
},
{
"epoch": 0.03,
"grad_norm": 0.9639232158660889,
"learning_rate": 8.924050632911393e-06,
"loss": 0.6722,
"step": 423
},
{
"epoch": 0.03,
"grad_norm": 0.9442266225814819,
"learning_rate": 8.945147679324895e-06,
"loss": 0.6588,
"step": 424
},
{
"epoch": 0.03,
"grad_norm": 0.9883629083633423,
"learning_rate": 8.966244725738398e-06,
"loss": 0.6536,
"step": 425
},
{
"epoch": 0.03,
"grad_norm": 1.013269066810608,
"learning_rate": 8.987341772151899e-06,
"loss": 0.6572,
"step": 426
},
{
"epoch": 0.03,
"grad_norm": 1.1119914054870605,
"learning_rate": 9.008438818565402e-06,
"loss": 0.7522,
"step": 427
},
{
"epoch": 0.03,
"grad_norm": 1.1306815147399902,
"learning_rate": 9.029535864978903e-06,
"loss": 0.6473,
"step": 428
},
{
"epoch": 0.03,
"grad_norm": 1.0609233379364014,
"learning_rate": 9.050632911392407e-06,
"loss": 0.6881,
"step": 429
},
{
"epoch": 0.03,
"grad_norm": 1.033166527748108,
"learning_rate": 9.071729957805908e-06,
"loss": 0.7441,
"step": 430
},
{
"epoch": 0.03,
"grad_norm": 1.1041021347045898,
"learning_rate": 9.09282700421941e-06,
"loss": 0.7217,
"step": 431
},
{
"epoch": 0.03,
"grad_norm": 1.1988205909729004,
"learning_rate": 9.113924050632912e-06,
"loss": 0.7092,
"step": 432
},
{
"epoch": 0.03,
"grad_norm": 1.054067611694336,
"learning_rate": 9.135021097046414e-06,
"loss": 0.6541,
"step": 433
},
{
"epoch": 0.03,
"grad_norm": 0.967709481716156,
"learning_rate": 9.156118143459917e-06,
"loss": 0.6873,
"step": 434
},
{
"epoch": 0.03,
"grad_norm": 1.0264620780944824,
"learning_rate": 9.177215189873418e-06,
"loss": 0.6761,
"step": 435
},
{
"epoch": 0.03,
"grad_norm": 1.0361930131912231,
"learning_rate": 9.198312236286921e-06,
"loss": 0.646,
"step": 436
},
{
"epoch": 0.03,
"grad_norm": 0.9842200875282288,
"learning_rate": 9.219409282700422e-06,
"loss": 0.6717,
"step": 437
},
{
"epoch": 0.03,
"grad_norm": 1.0381004810333252,
"learning_rate": 9.240506329113925e-06,
"loss": 0.6861,
"step": 438
},
{
"epoch": 0.03,
"grad_norm": 1.0402230024337769,
"learning_rate": 9.261603375527427e-06,
"loss": 0.7462,
"step": 439
},
{
"epoch": 0.03,
"grad_norm": 1.0186004638671875,
"learning_rate": 9.28270042194093e-06,
"loss": 0.6941,
"step": 440
},
{
"epoch": 0.03,
"grad_norm": 0.9940155148506165,
"learning_rate": 9.303797468354431e-06,
"loss": 0.7359,
"step": 441
},
{
"epoch": 0.03,
"grad_norm": 1.0634286403656006,
"learning_rate": 9.324894514767934e-06,
"loss": 0.7119,
"step": 442
},
{
"epoch": 0.03,
"grad_norm": 1.0823959112167358,
"learning_rate": 9.345991561181435e-06,
"loss": 0.6938,
"step": 443
},
{
"epoch": 0.03,
"grad_norm": 1.054929256439209,
"learning_rate": 9.367088607594937e-06,
"loss": 0.6777,
"step": 444
},
{
"epoch": 0.03,
"grad_norm": 0.9768496751785278,
"learning_rate": 9.38818565400844e-06,
"loss": 0.6919,
"step": 445
},
{
"epoch": 0.03,
"grad_norm": 0.9983486533164978,
"learning_rate": 9.409282700421943e-06,
"loss": 0.6621,
"step": 446
},
{
"epoch": 0.03,
"grad_norm": 1.0734831094741821,
"learning_rate": 9.430379746835444e-06,
"loss": 0.7321,
"step": 447
},
{
"epoch": 0.03,
"grad_norm": 0.9543827772140503,
"learning_rate": 9.451476793248946e-06,
"loss": 0.68,
"step": 448
},
{
"epoch": 0.03,
"grad_norm": 0.9443538188934326,
"learning_rate": 9.472573839662449e-06,
"loss": 0.6456,
"step": 449
},
{
"epoch": 0.03,
"grad_norm": 1.0233805179595947,
"learning_rate": 9.49367088607595e-06,
"loss": 0.7291,
"step": 450
},
{
"epoch": 0.03,
"grad_norm": 0.9053292870521545,
"learning_rate": 9.514767932489453e-06,
"loss": 0.6394,
"step": 451
},
{
"epoch": 0.03,
"grad_norm": 0.9343140125274658,
"learning_rate": 9.535864978902954e-06,
"loss": 0.6988,
"step": 452
},
{
"epoch": 0.03,
"grad_norm": 1.014218807220459,
"learning_rate": 9.556962025316456e-06,
"loss": 0.6681,
"step": 453
},
{
"epoch": 0.03,
"grad_norm": 1.0326037406921387,
"learning_rate": 9.578059071729959e-06,
"loss": 0.7683,
"step": 454
},
{
"epoch": 0.03,
"grad_norm": 1.0093090534210205,
"learning_rate": 9.599156118143462e-06,
"loss": 0.7108,
"step": 455
},
{
"epoch": 0.03,
"grad_norm": 1.0127726793289185,
"learning_rate": 9.620253164556963e-06,
"loss": 0.7319,
"step": 456
},
{
"epoch": 0.03,
"grad_norm": 1.0509214401245117,
"learning_rate": 9.641350210970464e-06,
"loss": 0.7353,
"step": 457
},
{
"epoch": 0.03,
"grad_norm": 1.0279244184494019,
"learning_rate": 9.662447257383967e-06,
"loss": 0.675,
"step": 458
},
{
"epoch": 0.03,
"grad_norm": 1.001551866531372,
"learning_rate": 9.68354430379747e-06,
"loss": 0.619,
"step": 459
},
{
"epoch": 0.03,
"grad_norm": 1.1098026037216187,
"learning_rate": 9.704641350210972e-06,
"loss": 0.7643,
"step": 460
},
{
"epoch": 0.03,
"grad_norm": 1.007842779159546,
"learning_rate": 9.725738396624473e-06,
"loss": 0.6524,
"step": 461
},
{
"epoch": 0.03,
"grad_norm": 1.0313695669174194,
"learning_rate": 9.746835443037975e-06,
"loss": 0.6565,
"step": 462
},
{
"epoch": 0.03,
"grad_norm": 1.0242507457733154,
"learning_rate": 9.767932489451478e-06,
"loss": 0.7165,
"step": 463
},
{
"epoch": 0.03,
"grad_norm": 1.0414601564407349,
"learning_rate": 9.78902953586498e-06,
"loss": 0.6985,
"step": 464
},
{
"epoch": 0.03,
"grad_norm": 1.0553888082504272,
"learning_rate": 9.810126582278482e-06,
"loss": 0.7361,
"step": 465
},
{
"epoch": 0.03,
"grad_norm": 1.0666173696517944,
"learning_rate": 9.831223628691983e-06,
"loss": 0.681,
"step": 466
},
{
"epoch": 0.03,
"grad_norm": 0.92351233959198,
"learning_rate": 9.852320675105486e-06,
"loss": 0.6019,
"step": 467
},
{
"epoch": 0.03,
"grad_norm": 1.0312128067016602,
"learning_rate": 9.87341772151899e-06,
"loss": 0.7481,
"step": 468
},
{
"epoch": 0.03,
"grad_norm": 1.0440195798873901,
"learning_rate": 9.89451476793249e-06,
"loss": 0.6794,
"step": 469
},
{
"epoch": 0.03,
"grad_norm": 1.0302846431732178,
"learning_rate": 9.915611814345992e-06,
"loss": 0.6839,
"step": 470
},
{
"epoch": 0.03,
"grad_norm": 1.0816489458084106,
"learning_rate": 9.936708860759493e-06,
"loss": 0.7631,
"step": 471
},
{
"epoch": 0.03,
"grad_norm": 1.0711013078689575,
"learning_rate": 9.957805907172996e-06,
"loss": 0.7376,
"step": 472
},
{
"epoch": 0.03,
"grad_norm": 0.9822407960891724,
"learning_rate": 9.9789029535865e-06,
"loss": 0.7144,
"step": 473
},
{
"epoch": 0.03,
"grad_norm": 1.0089426040649414,
"learning_rate": 1e-05,
"loss": 0.7019,
"step": 474
},
{
"epoch": 0.03,
"grad_norm": 1.0129270553588867,
"learning_rate": 9.999999894733699e-06,
"loss": 0.7017,
"step": 475
},
{
"epoch": 0.03,
"grad_norm": 1.04535973072052,
"learning_rate": 9.999999578934793e-06,
"loss": 0.7501,
"step": 476
},
{
"epoch": 0.03,
"grad_norm": 0.9517323970794678,
"learning_rate": 9.9999990526033e-06,
"loss": 0.6579,
"step": 477
},
{
"epoch": 0.03,
"grad_norm": 1.0189645290374756,
"learning_rate": 9.99999831573924e-06,
"loss": 0.6476,
"step": 478
},
{
"epoch": 0.03,
"grad_norm": 1.0060005187988281,
"learning_rate": 9.999997368342644e-06,
"loss": 0.7177,
"step": 479
},
{
"epoch": 0.03,
"grad_norm": 1.0013384819030762,
"learning_rate": 9.999996210413553e-06,
"loss": 0.6951,
"step": 480
},
{
"epoch": 0.03,
"grad_norm": 1.0175272226333618,
"learning_rate": 9.999994841952016e-06,
"loss": 0.6826,
"step": 481
},
{
"epoch": 0.03,
"grad_norm": 1.017972707748413,
"learning_rate": 9.99999326295809e-06,
"loss": 0.7456,
"step": 482
},
{
"epoch": 0.03,
"grad_norm": 1.0452282428741455,
"learning_rate": 9.99999147343184e-06,
"loss": 0.6436,
"step": 483
},
{
"epoch": 0.03,
"grad_norm": 1.0681228637695312,
"learning_rate": 9.999989473373344e-06,
"loss": 0.6529,
"step": 484
},
{
"epoch": 0.03,
"grad_norm": 1.0214364528656006,
"learning_rate": 9.999987262782684e-06,
"loss": 0.6911,
"step": 485
},
{
"epoch": 0.03,
"grad_norm": 1.0358929634094238,
"learning_rate": 9.999984841659955e-06,
"loss": 0.7087,
"step": 486
},
{
"epoch": 0.03,
"grad_norm": 1.1352185010910034,
"learning_rate": 9.999982210005258e-06,
"loss": 0.674,
"step": 487
},
{
"epoch": 0.03,
"grad_norm": 0.992149293422699,
"learning_rate": 9.999979367818704e-06,
"loss": 0.6709,
"step": 488
},
{
"epoch": 0.03,
"grad_norm": 0.9382144808769226,
"learning_rate": 9.999976315100412e-06,
"loss": 0.6415,
"step": 489
},
{
"epoch": 0.03,
"grad_norm": 1.002973198890686,
"learning_rate": 9.99997305185051e-06,
"loss": 0.6603,
"step": 490
},
{
"epoch": 0.03,
"grad_norm": 1.0173821449279785,
"learning_rate": 9.999969578069137e-06,
"loss": 0.7378,
"step": 491
},
{
"epoch": 0.03,
"grad_norm": 1.0466855764389038,
"learning_rate": 9.99996589375644e-06,
"loss": 0.6665,
"step": 492
},
{
"epoch": 0.03,
"grad_norm": 1.0387037992477417,
"learning_rate": 9.999961998912573e-06,
"loss": 0.6855,
"step": 493
},
{
"epoch": 0.03,
"grad_norm": 1.0338493585586548,
"learning_rate": 9.999957893537697e-06,
"loss": 0.7131,
"step": 494
},
{
"epoch": 0.03,
"grad_norm": 1.038668155670166,
"learning_rate": 9.999953577631991e-06,
"loss": 0.725,
"step": 495
},
{
"epoch": 0.03,
"grad_norm": 1.1043860912322998,
"learning_rate": 9.999949051195631e-06,
"loss": 0.7042,
"step": 496
},
{
"epoch": 0.03,
"grad_norm": 0.9517979025840759,
"learning_rate": 9.999944314228811e-06,
"loss": 0.6591,
"step": 497
},
{
"epoch": 0.03,
"grad_norm": 0.9950364232063293,
"learning_rate": 9.99993936673173e-06,
"loss": 0.6464,
"step": 498
},
{
"epoch": 0.03,
"grad_norm": 0.9766575694084167,
"learning_rate": 9.999934208704595e-06,
"loss": 0.6531,
"step": 499
},
{
"epoch": 0.03,
"grad_norm": 0.9440507888793945,
"learning_rate": 9.999928840147624e-06,
"loss": 0.6836,
"step": 500
},
{
"epoch": 0.03,
"grad_norm": 0.9839968681335449,
"learning_rate": 9.999923261061043e-06,
"loss": 0.6737,
"step": 501
},
{
"epoch": 0.03,
"grad_norm": 0.9848559498786926,
"learning_rate": 9.999917471445086e-06,
"loss": 0.6518,
"step": 502
},
{
"epoch": 0.03,
"grad_norm": 1.0254359245300293,
"learning_rate": 9.999911471299998e-06,
"loss": 0.6916,
"step": 503
},
{
"epoch": 0.03,
"grad_norm": 1.0536845922470093,
"learning_rate": 9.999905260626033e-06,
"loss": 0.7099,
"step": 504
},
{
"epoch": 0.03,
"grad_norm": 1.1166943311691284,
"learning_rate": 9.99989883942345e-06,
"loss": 0.7506,
"step": 505
},
{
"epoch": 0.03,
"grad_norm": 1.0335949659347534,
"learning_rate": 9.999892207692521e-06,
"loss": 0.666,
"step": 506
},
{
"epoch": 0.03,
"grad_norm": 1.14674711227417,
"learning_rate": 9.999885365433523e-06,
"loss": 0.6926,
"step": 507
},
{
"epoch": 0.03,
"grad_norm": 1.104755163192749,
"learning_rate": 9.999878312646748e-06,
"loss": 0.7511,
"step": 508
},
{
"epoch": 0.03,
"grad_norm": 1.075617790222168,
"learning_rate": 9.999871049332488e-06,
"loss": 0.7566,
"step": 509
},
{
"epoch": 0.03,
"grad_norm": 1.0128673315048218,
"learning_rate": 9.999863575491053e-06,
"loss": 0.7372,
"step": 510
},
{
"epoch": 0.03,
"grad_norm": 1.0448622703552246,
"learning_rate": 9.999855891122754e-06,
"loss": 0.7545,
"step": 511
},
{
"epoch": 0.03,
"grad_norm": 1.0948630571365356,
"learning_rate": 9.999847996227918e-06,
"loss": 0.7373,
"step": 512
},
{
"epoch": 0.03,
"grad_norm": 1.0382819175720215,
"learning_rate": 9.999839890806877e-06,
"loss": 0.7146,
"step": 513
},
{
"epoch": 0.03,
"grad_norm": 1.0122886896133423,
"learning_rate": 9.99983157485997e-06,
"loss": 0.6998,
"step": 514
},
{
"epoch": 0.03,
"grad_norm": 1.016714334487915,
"learning_rate": 9.99982304838755e-06,
"loss": 0.685,
"step": 515
},
{
"epoch": 0.03,
"grad_norm": 1.0722309350967407,
"learning_rate": 9.999814311389973e-06,
"loss": 0.6384,
"step": 516
},
{
"epoch": 0.03,
"grad_norm": 1.0432019233703613,
"learning_rate": 9.99980536386761e-06,
"loss": 0.7179,
"step": 517
},
{
"epoch": 0.03,
"grad_norm": 0.9547827243804932,
"learning_rate": 9.999796205820835e-06,
"loss": 0.6755,
"step": 518
},
{
"epoch": 0.03,
"grad_norm": 1.041869878768921,
"learning_rate": 9.999786837250034e-06,
"loss": 0.6711,
"step": 519
},
{
"epoch": 0.03,
"grad_norm": 1.1629664897918701,
"learning_rate": 9.999777258155604e-06,
"loss": 0.7044,
"step": 520
},
{
"epoch": 0.03,
"grad_norm": 1.1089982986450195,
"learning_rate": 9.999767468537947e-06,
"loss": 0.6838,
"step": 521
},
{
"epoch": 0.03,
"grad_norm": 1.0850619077682495,
"learning_rate": 9.999757468397473e-06,
"loss": 0.703,
"step": 522
},
{
"epoch": 0.03,
"grad_norm": 0.9639663100242615,
"learning_rate": 9.999747257734605e-06,
"loss": 0.6591,
"step": 523
},
{
"epoch": 0.03,
"grad_norm": 1.1336872577667236,
"learning_rate": 9.999736836549773e-06,
"loss": 0.6886,
"step": 524
},
{
"epoch": 0.03,
"grad_norm": 0.9170753955841064,
"learning_rate": 9.999726204843417e-06,
"loss": 0.645,
"step": 525
},
{
"epoch": 0.03,
"grad_norm": 0.9949559569358826,
"learning_rate": 9.999715362615983e-06,
"loss": 0.6407,
"step": 526
},
{
"epoch": 0.03,
"grad_norm": 1.0204219818115234,
"learning_rate": 9.999704309867926e-06,
"loss": 0.7058,
"step": 527
},
{
"epoch": 0.03,
"grad_norm": 0.9866481423377991,
"learning_rate": 9.999693046599715e-06,
"loss": 0.635,
"step": 528
},
{
"epoch": 0.03,
"grad_norm": 1.0090922117233276,
"learning_rate": 9.99968157281182e-06,
"loss": 0.6621,
"step": 529
},
{
"epoch": 0.03,
"grad_norm": 1.0550429821014404,
"learning_rate": 9.999669888504731e-06,
"loss": 0.6731,
"step": 530
},
{
"epoch": 0.03,
"grad_norm": 0.9702327251434326,
"learning_rate": 9.999657993678932e-06,
"loss": 0.6619,
"step": 531
},
{
"epoch": 0.03,
"grad_norm": 0.999877393245697,
"learning_rate": 9.999645888334927e-06,
"loss": 0.7003,
"step": 532
},
{
"epoch": 0.03,
"grad_norm": 0.9928595423698425,
"learning_rate": 9.999633572473228e-06,
"loss": 0.7044,
"step": 533
},
{
"epoch": 0.03,
"grad_norm": 1.1135807037353516,
"learning_rate": 9.999621046094353e-06,
"loss": 0.7308,
"step": 534
},
{
"epoch": 0.03,
"grad_norm": 1.073427438735962,
"learning_rate": 9.999608309198827e-06,
"loss": 0.6319,
"step": 535
},
{
"epoch": 0.03,
"grad_norm": 1.006304144859314,
"learning_rate": 9.999595361787187e-06,
"loss": 0.6534,
"step": 536
},
{
"epoch": 0.03,
"grad_norm": 1.0457631349563599,
"learning_rate": 9.999582203859977e-06,
"loss": 0.6932,
"step": 537
},
{
"epoch": 0.03,
"grad_norm": 1.0090550184249878,
"learning_rate": 9.999568835417755e-06,
"loss": 0.6825,
"step": 538
},
{
"epoch": 0.03,
"grad_norm": 0.9906076192855835,
"learning_rate": 9.99955525646108e-06,
"loss": 0.7105,
"step": 539
},
{
"epoch": 0.03,
"grad_norm": 1.0822291374206543,
"learning_rate": 9.999541466990526e-06,
"loss": 0.6908,
"step": 540
},
{
"epoch": 0.03,
"grad_norm": 0.9858079552650452,
"learning_rate": 9.999527467006674e-06,
"loss": 0.6584,
"step": 541
},
{
"epoch": 0.03,
"grad_norm": 0.99437415599823,
"learning_rate": 9.999513256510112e-06,
"loss": 0.6743,
"step": 542
},
{
"epoch": 0.03,
"grad_norm": 1.1383122205734253,
"learning_rate": 9.999498835501438e-06,
"loss": 0.7183,
"step": 543
},
{
"epoch": 0.03,
"grad_norm": 1.0107368230819702,
"learning_rate": 9.99948420398126e-06,
"loss": 0.7656,
"step": 544
},
{
"epoch": 0.03,
"grad_norm": 1.0026836395263672,
"learning_rate": 9.999469361950195e-06,
"loss": 0.7228,
"step": 545
},
{
"epoch": 0.03,
"grad_norm": 0.9779771566390991,
"learning_rate": 9.999454309408868e-06,
"loss": 0.7003,
"step": 546
},
{
"epoch": 0.03,
"grad_norm": 1.0403019189834595,
"learning_rate": 9.999439046357908e-06,
"loss": 0.6832,
"step": 547
},
{
"epoch": 0.03,
"grad_norm": 0.9707220196723938,
"learning_rate": 9.999423572797964e-06,
"loss": 0.6621,
"step": 548
},
{
"epoch": 0.03,
"grad_norm": 1.0087053775787354,
"learning_rate": 9.999407888729686e-06,
"loss": 0.698,
"step": 549
},
{
"epoch": 0.03,
"grad_norm": 0.9814850091934204,
"learning_rate": 9.999391994153734e-06,
"loss": 0.6615,
"step": 550
},
{
"epoch": 0.03,
"grad_norm": 0.9962078332901001,
"learning_rate": 9.999375889070773e-06,
"loss": 0.6748,
"step": 551
},
{
"epoch": 0.03,
"grad_norm": 0.9213439226150513,
"learning_rate": 9.99935957348149e-06,
"loss": 0.6722,
"step": 552
},
{
"epoch": 0.04,
"grad_norm": 0.9168039560317993,
"learning_rate": 9.999343047386562e-06,
"loss": 0.6371,
"step": 553
},
{
"epoch": 0.04,
"grad_norm": 0.9854353666305542,
"learning_rate": 9.999326310786692e-06,
"loss": 0.6603,
"step": 554
},
{
"epoch": 0.04,
"grad_norm": 1.073123574256897,
"learning_rate": 9.999309363682582e-06,
"loss": 0.6385,
"step": 555
},
{
"epoch": 0.04,
"grad_norm": 1.007665753364563,
"learning_rate": 9.999292206074946e-06,
"loss": 0.6184,
"step": 556
},
{
"epoch": 0.04,
"grad_norm": 1.0501381158828735,
"learning_rate": 9.999274837964507e-06,
"loss": 0.6922,
"step": 557
},
{
"epoch": 0.04,
"grad_norm": 1.044360876083374,
"learning_rate": 9.999257259351995e-06,
"loss": 0.7034,
"step": 558
},
{
"epoch": 0.04,
"grad_norm": 0.970619261264801,
"learning_rate": 9.999239470238151e-06,
"loss": 0.6615,
"step": 559
},
{
"epoch": 0.04,
"grad_norm": 0.9886036515235901,
"learning_rate": 9.999221470623726e-06,
"loss": 0.6686,
"step": 560
},
{
"epoch": 0.04,
"grad_norm": 0.9825366735458374,
"learning_rate": 9.999203260509473e-06,
"loss": 0.6488,
"step": 561
},
{
"epoch": 0.04,
"grad_norm": 0.9640889167785645,
"learning_rate": 9.999184839896163e-06,
"loss": 0.7087,
"step": 562
},
{
"epoch": 0.04,
"grad_norm": 0.9821694493293762,
"learning_rate": 9.99916620878457e-06,
"loss": 0.6697,
"step": 563
},
{
"epoch": 0.04,
"grad_norm": 0.999758780002594,
"learning_rate": 9.99914736717548e-06,
"loss": 0.7016,
"step": 564
},
{
"epoch": 0.04,
"grad_norm": 0.9824835658073425,
"learning_rate": 9.999128315069684e-06,
"loss": 0.6894,
"step": 565
},
{
"epoch": 0.04,
"grad_norm": 1.0214180946350098,
"learning_rate": 9.999109052467986e-06,
"loss": 0.689,
"step": 566
},
{
"epoch": 0.04,
"grad_norm": 0.9945915341377258,
"learning_rate": 9.999089579371195e-06,
"loss": 0.673,
"step": 567
},
{
"epoch": 0.04,
"grad_norm": 1.036805272102356,
"learning_rate": 9.999069895780133e-06,
"loss": 0.6567,
"step": 568
},
{
"epoch": 0.04,
"grad_norm": 1.0524154901504517,
"learning_rate": 9.99905000169563e-06,
"loss": 0.6371,
"step": 569
},
{
"epoch": 0.04,
"grad_norm": 1.024978518486023,
"learning_rate": 9.99902989711852e-06,
"loss": 0.607,
"step": 570
},
{
"epoch": 0.04,
"grad_norm": 0.9848310351371765,
"learning_rate": 9.99900958204965e-06,
"loss": 0.6629,
"step": 571
},
{
"epoch": 0.04,
"grad_norm": 0.9815685749053955,
"learning_rate": 9.99898905648988e-06,
"loss": 0.66,
"step": 572
},
{
"epoch": 0.04,
"grad_norm": 1.0205934047698975,
"learning_rate": 9.998968320440068e-06,
"loss": 0.6797,
"step": 573
},
{
"epoch": 0.04,
"grad_norm": 0.999477207660675,
"learning_rate": 9.998947373901092e-06,
"loss": 0.6605,
"step": 574
},
{
"epoch": 0.04,
"grad_norm": 0.8807664513587952,
"learning_rate": 9.998926216873833e-06,
"loss": 0.6118,
"step": 575
},
{
"epoch": 0.04,
"grad_norm": 0.9839978218078613,
"learning_rate": 9.998904849359179e-06,
"loss": 0.7242,
"step": 576
},
{
"epoch": 0.04,
"grad_norm": 1.015552043914795,
"learning_rate": 9.998883271358033e-06,
"loss": 0.6737,
"step": 577
},
{
"epoch": 0.04,
"grad_norm": 0.9544459581375122,
"learning_rate": 9.998861482871303e-06,
"loss": 0.654,
"step": 578
},
{
"epoch": 0.04,
"grad_norm": 1.0526336431503296,
"learning_rate": 9.998839483899904e-06,
"loss": 0.6855,
"step": 579
},
{
"epoch": 0.04,
"grad_norm": 0.9297081828117371,
"learning_rate": 9.998817274444765e-06,
"loss": 0.671,
"step": 580
},
{
"epoch": 0.04,
"grad_norm": 0.9545259475708008,
"learning_rate": 9.998794854506819e-06,
"loss": 0.6331,
"step": 581
},
{
"epoch": 0.04,
"grad_norm": 0.94922935962677,
"learning_rate": 9.998772224087011e-06,
"loss": 0.6739,
"step": 582
},
{
"epoch": 0.04,
"grad_norm": 0.9858238101005554,
"learning_rate": 9.998749383186296e-06,
"loss": 0.6402,
"step": 583
},
{
"epoch": 0.04,
"grad_norm": 1.0650100708007812,
"learning_rate": 9.998726331805632e-06,
"loss": 0.646,
"step": 584
},
{
"epoch": 0.04,
"grad_norm": 1.0530078411102295,
"learning_rate": 9.998703069945995e-06,
"loss": 0.6362,
"step": 585
},
{
"epoch": 0.04,
"grad_norm": 1.036247968673706,
"learning_rate": 9.998679597608357e-06,
"loss": 0.7401,
"step": 586
},
{
"epoch": 0.04,
"grad_norm": 0.990145206451416,
"learning_rate": 9.998655914793711e-06,
"loss": 0.6178,
"step": 587
},
{
"epoch": 0.04,
"grad_norm": 0.988255500793457,
"learning_rate": 9.998632021503055e-06,
"loss": 0.6291,
"step": 588
},
{
"epoch": 0.04,
"grad_norm": 0.9244970083236694,
"learning_rate": 9.998607917737393e-06,
"loss": 0.6075,
"step": 589
},
{
"epoch": 0.04,
"grad_norm": 1.020477056503296,
"learning_rate": 9.99858360349774e-06,
"loss": 0.6414,
"step": 590
},
{
"epoch": 0.04,
"grad_norm": 1.138732671737671,
"learning_rate": 9.99855907878512e-06,
"loss": 0.6744,
"step": 591
},
{
"epoch": 0.04,
"grad_norm": 1.045698881149292,
"learning_rate": 9.998534343600567e-06,
"loss": 0.6219,
"step": 592
},
{
"epoch": 0.04,
"grad_norm": 0.9490424990653992,
"learning_rate": 9.99850939794512e-06,
"loss": 0.6726,
"step": 593
},
{
"epoch": 0.04,
"grad_norm": 0.944223165512085,
"learning_rate": 9.998484241819833e-06,
"loss": 0.6593,
"step": 594
},
{
"epoch": 0.04,
"grad_norm": 0.941078245639801,
"learning_rate": 9.99845887522576e-06,
"loss": 0.6168,
"step": 595
},
{
"epoch": 0.04,
"grad_norm": 0.9710420966148376,
"learning_rate": 9.998433298163974e-06,
"loss": 0.7174,
"step": 596
},
{
"epoch": 0.04,
"grad_norm": 0.958227276802063,
"learning_rate": 9.99840751063555e-06,
"loss": 0.6664,
"step": 597
},
{
"epoch": 0.04,
"grad_norm": 1.0510207414627075,
"learning_rate": 9.998381512641574e-06,
"loss": 0.6356,
"step": 598
},
{
"epoch": 0.04,
"grad_norm": 1.0442863702774048,
"learning_rate": 9.99835530418314e-06,
"loss": 0.6988,
"step": 599
},
{
"epoch": 0.04,
"grad_norm": 1.0224828720092773,
"learning_rate": 9.998328885261352e-06,
"loss": 0.7323,
"step": 600
},
{
"epoch": 0.04,
"grad_norm": 1.007456660270691,
"learning_rate": 9.998302255877323e-06,
"loss": 0.7505,
"step": 601
},
{
"epoch": 0.04,
"grad_norm": 1.0247341394424438,
"learning_rate": 9.998275416032176e-06,
"loss": 0.6373,
"step": 602
},
{
"epoch": 0.04,
"grad_norm": 0.9564207792282104,
"learning_rate": 9.998248365727037e-06,
"loss": 0.6353,
"step": 603
},
{
"epoch": 0.04,
"grad_norm": 1.015875220298767,
"learning_rate": 9.998221104963047e-06,
"loss": 0.6737,
"step": 604
},
{
"epoch": 0.04,
"grad_norm": 0.9237945079803467,
"learning_rate": 9.998193633741353e-06,
"loss": 0.63,
"step": 605
},
{
"epoch": 0.04,
"grad_norm": 0.961380660533905,
"learning_rate": 9.998165952063113e-06,
"loss": 0.6423,
"step": 606
},
{
"epoch": 0.04,
"grad_norm": 1.033104658126831,
"learning_rate": 9.998138059929493e-06,
"loss": 0.6457,
"step": 607
},
{
"epoch": 0.04,
"grad_norm": 0.9923886060714722,
"learning_rate": 9.998109957341665e-06,
"loss": 0.6845,
"step": 608
},
{
"epoch": 0.04,
"grad_norm": 1.0205433368682861,
"learning_rate": 9.998081644300815e-06,
"loss": 0.6861,
"step": 609
},
{
"epoch": 0.04,
"grad_norm": 1.0005221366882324,
"learning_rate": 9.998053120808133e-06,
"loss": 0.6066,
"step": 610
},
{
"epoch": 0.04,
"grad_norm": 1.06248140335083,
"learning_rate": 9.998024386864821e-06,
"loss": 0.6687,
"step": 611
},
{
"epoch": 0.04,
"grad_norm": 0.9576296806335449,
"learning_rate": 9.99799544247209e-06,
"loss": 0.6272,
"step": 612
},
{
"epoch": 0.04,
"grad_norm": 1.0306636095046997,
"learning_rate": 9.997966287631157e-06,
"loss": 0.6418,
"step": 613
},
{
"epoch": 0.04,
"grad_norm": 0.9254865050315857,
"learning_rate": 9.997936922343253e-06,
"loss": 0.6598,
"step": 614
},
{
"epoch": 0.04,
"grad_norm": 1.0606073141098022,
"learning_rate": 9.997907346609608e-06,
"loss": 0.6819,
"step": 615
},
{
"epoch": 0.04,
"grad_norm": 0.9714949727058411,
"learning_rate": 9.997877560431472e-06,
"loss": 0.6894,
"step": 616
},
{
"epoch": 0.04,
"grad_norm": 0.9910696148872375,
"learning_rate": 9.9978475638101e-06,
"loss": 0.6925,
"step": 617
},
{
"epoch": 0.04,
"grad_norm": 0.986289918422699,
"learning_rate": 9.997817356746751e-06,
"loss": 0.6504,
"step": 618
},
{
"epoch": 0.04,
"grad_norm": 0.9627223014831543,
"learning_rate": 9.9977869392427e-06,
"loss": 0.6327,
"step": 619
},
{
"epoch": 0.04,
"grad_norm": 1.0320593118667603,
"learning_rate": 9.997756311299229e-06,
"loss": 0.711,
"step": 620
},
{
"epoch": 0.04,
"grad_norm": 1.0085593461990356,
"learning_rate": 9.997725472917623e-06,
"loss": 0.6989,
"step": 621
},
{
"epoch": 0.04,
"grad_norm": 0.9825278520584106,
"learning_rate": 9.997694424099184e-06,
"loss": 0.6127,
"step": 622
},
{
"epoch": 0.04,
"grad_norm": 0.9646775126457214,
"learning_rate": 9.99766316484522e-06,
"loss": 0.6,
"step": 623
},
{
"epoch": 0.04,
"grad_norm": 0.963337242603302,
"learning_rate": 9.997631695157043e-06,
"loss": 0.668,
"step": 624
},
{
"epoch": 0.04,
"grad_norm": 0.9412251710891724,
"learning_rate": 9.997600015035982e-06,
"loss": 0.7373,
"step": 625
},
{
"epoch": 0.04,
"grad_norm": 0.9189504981040955,
"learning_rate": 9.99756812448337e-06,
"loss": 0.6178,
"step": 626
},
{
"epoch": 0.04,
"grad_norm": 1.0134235620498657,
"learning_rate": 9.99753602350055e-06,
"loss": 0.7062,
"step": 627
},
{
"epoch": 0.04,
"grad_norm": 0.9411819577217102,
"learning_rate": 9.997503712088873e-06,
"loss": 0.6587,
"step": 628
},
{
"epoch": 0.04,
"grad_norm": 1.1199616193771362,
"learning_rate": 9.9974711902497e-06,
"loss": 0.7078,
"step": 629
},
{
"epoch": 0.04,
"grad_norm": 1.0165560245513916,
"learning_rate": 9.997438457984398e-06,
"loss": 0.7244,
"step": 630
},
{
"epoch": 0.04,
"grad_norm": 0.9950897097587585,
"learning_rate": 9.997405515294349e-06,
"loss": 0.6421,
"step": 631
},
{
"epoch": 0.04,
"grad_norm": 1.0164552927017212,
"learning_rate": 9.99737236218094e-06,
"loss": 0.7025,
"step": 632
},
{
"epoch": 0.04,
"grad_norm": 0.993774950504303,
"learning_rate": 9.997338998645562e-06,
"loss": 0.6816,
"step": 633
},
{
"epoch": 0.04,
"grad_norm": 1.4380717277526855,
"learning_rate": 9.997305424689626e-06,
"loss": 0.6834,
"step": 634
},
{
"epoch": 0.04,
"grad_norm": 0.9469321966171265,
"learning_rate": 9.997271640314542e-06,
"loss": 0.6764,
"step": 635
},
{
"epoch": 0.04,
"grad_norm": 0.992761492729187,
"learning_rate": 9.997237645521733e-06,
"loss": 0.6855,
"step": 636
},
{
"epoch": 0.04,
"grad_norm": 0.998432993888855,
"learning_rate": 9.997203440312632e-06,
"loss": 0.6508,
"step": 637
},
{
"epoch": 0.04,
"grad_norm": 1.0937362909317017,
"learning_rate": 9.997169024688678e-06,
"loss": 0.6324,
"step": 638
},
{
"epoch": 0.04,
"grad_norm": 0.9640938639640808,
"learning_rate": 9.997134398651318e-06,
"loss": 0.644,
"step": 639
},
{
"epoch": 0.04,
"grad_norm": 0.98480224609375,
"learning_rate": 9.997099562202015e-06,
"loss": 0.6983,
"step": 640
},
{
"epoch": 0.04,
"grad_norm": 0.9848492741584778,
"learning_rate": 9.997064515342232e-06,
"loss": 0.6504,
"step": 641
},
{
"epoch": 0.04,
"grad_norm": 1.068900465965271,
"learning_rate": 9.997029258073445e-06,
"loss": 0.6797,
"step": 642
},
{
"epoch": 0.04,
"grad_norm": 0.9294676780700684,
"learning_rate": 9.99699379039714e-06,
"loss": 0.6919,
"step": 643
},
{
"epoch": 0.04,
"grad_norm": 1.054355263710022,
"learning_rate": 9.996958112314811e-06,
"loss": 0.6627,
"step": 644
},
{
"epoch": 0.04,
"grad_norm": 0.9877650737762451,
"learning_rate": 9.996922223827958e-06,
"loss": 0.6641,
"step": 645
},
{
"epoch": 0.04,
"grad_norm": 0.9646854400634766,
"learning_rate": 9.996886124938092e-06,
"loss": 0.689,
"step": 646
},
{
"epoch": 0.04,
"grad_norm": 0.9384362101554871,
"learning_rate": 9.996849815646736e-06,
"loss": 0.6542,
"step": 647
},
{
"epoch": 0.04,
"grad_norm": 0.9519203305244446,
"learning_rate": 9.996813295955417e-06,
"loss": 0.6862,
"step": 648
},
{
"epoch": 0.04,
"grad_norm": 0.9722259640693665,
"learning_rate": 9.996776565865671e-06,
"loss": 0.647,
"step": 649
},
{
"epoch": 0.04,
"grad_norm": 0.9623055458068848,
"learning_rate": 9.996739625379049e-06,
"loss": 0.66,
"step": 650
},
{
"epoch": 0.04,
"grad_norm": 1.0324175357818604,
"learning_rate": 9.9967024744971e-06,
"loss": 0.6562,
"step": 651
},
{
"epoch": 0.04,
"grad_norm": 0.9541144371032715,
"learning_rate": 9.996665113221396e-06,
"loss": 0.6685,
"step": 652
},
{
"epoch": 0.04,
"grad_norm": 0.9493923187255859,
"learning_rate": 9.996627541553504e-06,
"loss": 0.6545,
"step": 653
},
{
"epoch": 0.04,
"grad_norm": 0.8991278409957886,
"learning_rate": 9.996589759495008e-06,
"loss": 0.627,
"step": 654
},
{
"epoch": 0.04,
"grad_norm": 1.066519021987915,
"learning_rate": 9.9965517670475e-06,
"loss": 0.7353,
"step": 655
},
{
"epoch": 0.04,
"grad_norm": 1.0149874687194824,
"learning_rate": 9.996513564212577e-06,
"loss": 0.6611,
"step": 656
},
{
"epoch": 0.04,
"grad_norm": 0.9408120512962341,
"learning_rate": 9.996475150991852e-06,
"loss": 0.6596,
"step": 657
},
{
"epoch": 0.04,
"grad_norm": 1.0212035179138184,
"learning_rate": 9.99643652738694e-06,
"loss": 0.7073,
"step": 658
},
{
"epoch": 0.04,
"grad_norm": 1.066163182258606,
"learning_rate": 9.996397693399465e-06,
"loss": 0.6822,
"step": 659
},
{
"epoch": 0.04,
"grad_norm": 0.9859758615493774,
"learning_rate": 9.996358649031066e-06,
"loss": 0.6691,
"step": 660
},
{
"epoch": 0.04,
"grad_norm": 0.9595489501953125,
"learning_rate": 9.996319394283384e-06,
"loss": 0.6466,
"step": 661
},
{
"epoch": 0.04,
"grad_norm": 1.0166726112365723,
"learning_rate": 9.996279929158074e-06,
"loss": 0.7591,
"step": 662
},
{
"epoch": 0.04,
"grad_norm": 1.0264617204666138,
"learning_rate": 9.996240253656796e-06,
"loss": 0.6632,
"step": 663
},
{
"epoch": 0.04,
"grad_norm": 0.9634756445884705,
"learning_rate": 9.996200367781224e-06,
"loss": 0.6782,
"step": 664
},
{
"epoch": 0.04,
"grad_norm": 0.9677413105964661,
"learning_rate": 9.996160271533033e-06,
"loss": 0.6686,
"step": 665
},
{
"epoch": 0.04,
"grad_norm": 1.0524028539657593,
"learning_rate": 9.996119964913914e-06,
"loss": 0.6577,
"step": 666
},
{
"epoch": 0.04,
"grad_norm": 1.0887914896011353,
"learning_rate": 9.996079447925563e-06,
"loss": 0.7091,
"step": 667
},
{
"epoch": 0.04,
"grad_norm": 1.1000274419784546,
"learning_rate": 9.996038720569688e-06,
"loss": 0.6642,
"step": 668
},
{
"epoch": 0.04,
"grad_norm": 0.8987544775009155,
"learning_rate": 9.995997782848e-06,
"loss": 0.6831,
"step": 669
},
{
"epoch": 0.04,
"grad_norm": 0.9039768576622009,
"learning_rate": 9.995956634762227e-06,
"loss": 0.6322,
"step": 670
},
{
"epoch": 0.04,
"grad_norm": 0.9678673148155212,
"learning_rate": 9.995915276314099e-06,
"loss": 0.6755,
"step": 671
},
{
"epoch": 0.04,
"grad_norm": 1.0128499269485474,
"learning_rate": 9.995873707505358e-06,
"loss": 0.6625,
"step": 672
},
{
"epoch": 0.04,
"grad_norm": 0.9772645235061646,
"learning_rate": 9.995831928337756e-06,
"loss": 0.6798,
"step": 673
},
{
"epoch": 0.04,
"grad_norm": 0.9836506843566895,
"learning_rate": 9.99578993881305e-06,
"loss": 0.7197,
"step": 674
},
{
"epoch": 0.04,
"grad_norm": 1.006994605064392,
"learning_rate": 9.995747738933009e-06,
"loss": 0.6148,
"step": 675
},
{
"epoch": 0.04,
"grad_norm": 0.9389720559120178,
"learning_rate": 9.995705328699408e-06,
"loss": 0.7033,
"step": 676
},
{
"epoch": 0.04,
"grad_norm": 1.027573823928833,
"learning_rate": 9.995662708114036e-06,
"loss": 0.7308,
"step": 677
},
{
"epoch": 0.04,
"grad_norm": 1.0021096467971802,
"learning_rate": 9.995619877178685e-06,
"loss": 0.6952,
"step": 678
},
{
"epoch": 0.04,
"grad_norm": 0.9737790822982788,
"learning_rate": 9.99557683589516e-06,
"loss": 0.741,
"step": 679
},
{
"epoch": 0.04,
"grad_norm": 0.9685630202293396,
"learning_rate": 9.995533584265273e-06,
"loss": 0.6778,
"step": 680
},
{
"epoch": 0.04,
"grad_norm": 1.057919979095459,
"learning_rate": 9.995490122290845e-06,
"loss": 0.7612,
"step": 681
},
{
"epoch": 0.04,
"grad_norm": 0.9825607538223267,
"learning_rate": 9.995446449973705e-06,
"loss": 0.6765,
"step": 682
},
{
"epoch": 0.04,
"grad_norm": 0.9538717269897461,
"learning_rate": 9.995402567315695e-06,
"loss": 0.6369,
"step": 683
},
{
"epoch": 0.04,
"grad_norm": 0.9630232453346252,
"learning_rate": 9.99535847431866e-06,
"loss": 0.6993,
"step": 684
},
{
"epoch": 0.04,
"grad_norm": 0.9161614179611206,
"learning_rate": 9.995314170984457e-06,
"loss": 0.6651,
"step": 685
},
{
"epoch": 0.04,
"grad_norm": 1.0125501155853271,
"learning_rate": 9.99526965731495e-06,
"loss": 0.6547,
"step": 686
},
{
"epoch": 0.04,
"grad_norm": 0.9450063109397888,
"learning_rate": 9.995224933312016e-06,
"loss": 0.6821,
"step": 687
},
{
"epoch": 0.04,
"grad_norm": 0.9932665228843689,
"learning_rate": 9.995179998977537e-06,
"loss": 0.6668,
"step": 688
},
{
"epoch": 0.04,
"grad_norm": 1.0881638526916504,
"learning_rate": 9.995134854313407e-06,
"loss": 0.6555,
"step": 689
},
{
"epoch": 0.04,
"grad_norm": 1.0094410181045532,
"learning_rate": 9.995089499321521e-06,
"loss": 0.7013,
"step": 690
},
{
"epoch": 0.04,
"grad_norm": 0.9489861726760864,
"learning_rate": 9.995043934003796e-06,
"loss": 0.6614,
"step": 691
},
{
"epoch": 0.04,
"grad_norm": 0.9359292984008789,
"learning_rate": 9.994998158362148e-06,
"loss": 0.6638,
"step": 692
},
{
"epoch": 0.04,
"grad_norm": 1.031830906867981,
"learning_rate": 9.994952172398502e-06,
"loss": 0.7097,
"step": 693
},
{
"epoch": 0.04,
"grad_norm": 0.9993519186973572,
"learning_rate": 9.994905976114799e-06,
"loss": 0.7518,
"step": 694
},
{
"epoch": 0.04,
"grad_norm": 0.9693325757980347,
"learning_rate": 9.994859569512978e-06,
"loss": 0.6672,
"step": 695
},
{
"epoch": 0.04,
"grad_norm": 0.9676175117492676,
"learning_rate": 9.994812952594998e-06,
"loss": 0.6357,
"step": 696
},
{
"epoch": 0.04,
"grad_norm": 0.9201942682266235,
"learning_rate": 9.994766125362821e-06,
"loss": 0.6413,
"step": 697
},
{
"epoch": 0.04,
"grad_norm": 0.9743843674659729,
"learning_rate": 9.994719087818416e-06,
"loss": 0.6913,
"step": 698
},
{
"epoch": 0.04,
"grad_norm": 0.9933353066444397,
"learning_rate": 9.994671839963766e-06,
"loss": 0.645,
"step": 699
},
{
"epoch": 0.04,
"grad_norm": 0.9530336856842041,
"learning_rate": 9.994624381800861e-06,
"loss": 0.6292,
"step": 700
},
{
"epoch": 0.04,
"grad_norm": 0.9179254770278931,
"learning_rate": 9.994576713331699e-06,
"loss": 0.6347,
"step": 701
},
{
"epoch": 0.04,
"grad_norm": 1.0408787727355957,
"learning_rate": 9.994528834558285e-06,
"loss": 0.7041,
"step": 702
},
{
"epoch": 0.04,
"grad_norm": 0.9889572262763977,
"learning_rate": 9.994480745482636e-06,
"loss": 0.7333,
"step": 703
},
{
"epoch": 0.04,
"grad_norm": 0.9241334795951843,
"learning_rate": 9.99443244610678e-06,
"loss": 0.6409,
"step": 704
},
{
"epoch": 0.04,
"grad_norm": 0.9185009598731995,
"learning_rate": 9.994383936432745e-06,
"loss": 0.6136,
"step": 705
},
{
"epoch": 0.04,
"grad_norm": 1.019323468208313,
"learning_rate": 9.994335216462579e-06,
"loss": 0.6535,
"step": 706
},
{
"epoch": 0.04,
"grad_norm": 0.9755436182022095,
"learning_rate": 9.99428628619833e-06,
"loss": 0.666,
"step": 707
},
{
"epoch": 0.04,
"grad_norm": 1.0832858085632324,
"learning_rate": 9.994237145642058e-06,
"loss": 0.731,
"step": 708
},
{
"epoch": 0.04,
"grad_norm": 1.036895513534546,
"learning_rate": 9.994187794795835e-06,
"loss": 0.6424,
"step": 709
},
{
"epoch": 0.04,
"grad_norm": 1.0125101804733276,
"learning_rate": 9.994138233661737e-06,
"loss": 0.7033,
"step": 710
},
{
"epoch": 0.05,
"grad_norm": 0.9705720543861389,
"learning_rate": 9.994088462241851e-06,
"loss": 0.7013,
"step": 711
},
{
"epoch": 0.05,
"grad_norm": 0.9837139248847961,
"learning_rate": 9.994038480538274e-06,
"loss": 0.6626,
"step": 712
},
{
"epoch": 0.05,
"grad_norm": 1.0453715324401855,
"learning_rate": 9.993988288553109e-06,
"loss": 0.6823,
"step": 713
},
{
"epoch": 0.05,
"grad_norm": 0.9410306811332703,
"learning_rate": 9.993937886288471e-06,
"loss": 0.6091,
"step": 714
},
{
"epoch": 0.05,
"grad_norm": 1.074299693107605,
"learning_rate": 9.99388727374648e-06,
"loss": 0.7144,
"step": 715
},
{
"epoch": 0.05,
"grad_norm": 0.9971576929092407,
"learning_rate": 9.993836450929268e-06,
"loss": 0.6564,
"step": 716
},
{
"epoch": 0.05,
"grad_norm": 0.9908046126365662,
"learning_rate": 9.993785417838978e-06,
"loss": 0.5828,
"step": 717
},
{
"epoch": 0.05,
"grad_norm": 0.985888659954071,
"learning_rate": 9.993734174477752e-06,
"loss": 0.6428,
"step": 718
},
{
"epoch": 0.05,
"grad_norm": 0.9888742566108704,
"learning_rate": 9.993682720847755e-06,
"loss": 0.6407,
"step": 719
},
{
"epoch": 0.05,
"grad_norm": 1.1355136632919312,
"learning_rate": 9.99363105695115e-06,
"loss": 0.6762,
"step": 720
},
{
"epoch": 0.05,
"grad_norm": 1.0007946491241455,
"learning_rate": 9.993579182790111e-06,
"loss": 0.6832,
"step": 721
},
{
"epoch": 0.05,
"grad_norm": 0.9047017097473145,
"learning_rate": 9.993527098366826e-06,
"loss": 0.6187,
"step": 722
},
{
"epoch": 0.05,
"grad_norm": 1.020936369895935,
"learning_rate": 9.993474803683486e-06,
"loss": 0.7007,
"step": 723
},
{
"epoch": 0.05,
"grad_norm": 1.0052990913391113,
"learning_rate": 9.993422298742293e-06,
"loss": 0.6472,
"step": 724
},
{
"epoch": 0.05,
"grad_norm": 0.9748853445053101,
"learning_rate": 9.993369583545456e-06,
"loss": 0.6705,
"step": 725
},
{
"epoch": 0.05,
"grad_norm": 1.0031288862228394,
"learning_rate": 9.993316658095198e-06,
"loss": 0.6891,
"step": 726
},
{
"epoch": 0.05,
"grad_norm": 1.018814206123352,
"learning_rate": 9.993263522393745e-06,
"loss": 0.7134,
"step": 727
},
{
"epoch": 0.05,
"grad_norm": 0.9574296474456787,
"learning_rate": 9.993210176443338e-06,
"loss": 0.6798,
"step": 728
},
{
"epoch": 0.05,
"grad_norm": 1.0445259809494019,
"learning_rate": 9.993156620246219e-06,
"loss": 0.7278,
"step": 729
},
{
"epoch": 0.05,
"grad_norm": 0.9974734783172607,
"learning_rate": 9.993102853804643e-06,
"loss": 0.687,
"step": 730
},
{
"epoch": 0.05,
"grad_norm": 0.9887290000915527,
"learning_rate": 9.993048877120876e-06,
"loss": 0.6525,
"step": 731
},
{
"epoch": 0.05,
"grad_norm": 1.0144176483154297,
"learning_rate": 9.992994690197192e-06,
"loss": 0.6687,
"step": 732
},
{
"epoch": 0.05,
"grad_norm": 0.9862350225448608,
"learning_rate": 9.992940293035871e-06,
"loss": 0.6795,
"step": 733
},
{
"epoch": 0.05,
"grad_norm": 0.9350804090499878,
"learning_rate": 9.992885685639203e-06,
"loss": 0.6152,
"step": 734
},
{
"epoch": 0.05,
"grad_norm": 0.937683641910553,
"learning_rate": 9.992830868009487e-06,
"loss": 0.6963,
"step": 735
},
{
"epoch": 0.05,
"grad_norm": 0.9010510444641113,
"learning_rate": 9.992775840149031e-06,
"loss": 0.6196,
"step": 736
},
{
"epoch": 0.05,
"grad_norm": 0.9523539543151855,
"learning_rate": 9.992720602060155e-06,
"loss": 0.6837,
"step": 737
},
{
"epoch": 0.05,
"grad_norm": 1.0255656242370605,
"learning_rate": 9.992665153745182e-06,
"loss": 0.7196,
"step": 738
},
{
"epoch": 0.05,
"grad_norm": 0.9392181038856506,
"learning_rate": 9.992609495206448e-06,
"loss": 0.7169,
"step": 739
},
{
"epoch": 0.05,
"grad_norm": 0.9734467267990112,
"learning_rate": 9.992553626446296e-06,
"loss": 0.6623,
"step": 740
},
{
"epoch": 0.05,
"grad_norm": 1.0188223123550415,
"learning_rate": 9.992497547467079e-06,
"loss": 0.7326,
"step": 741
},
{
"epoch": 0.05,
"grad_norm": 0.9624093770980835,
"learning_rate": 9.992441258271157e-06,
"loss": 0.6977,
"step": 742
},
{
"epoch": 0.05,
"grad_norm": 1.082211971282959,
"learning_rate": 9.992384758860902e-06,
"loss": 0.6825,
"step": 743
},
{
"epoch": 0.05,
"grad_norm": 0.9943745732307434,
"learning_rate": 9.99232804923869e-06,
"loss": 0.6755,
"step": 744
},
{
"epoch": 0.05,
"grad_norm": 1.016251564025879,
"learning_rate": 9.992271129406914e-06,
"loss": 0.6997,
"step": 745
},
{
"epoch": 0.05,
"grad_norm": 0.9673280715942383,
"learning_rate": 9.992213999367965e-06,
"loss": 0.6589,
"step": 746
},
{
"epoch": 0.05,
"grad_norm": 0.984897255897522,
"learning_rate": 9.992156659124253e-06,
"loss": 0.6589,
"step": 747
},
{
"epoch": 0.05,
"grad_norm": 0.991965651512146,
"learning_rate": 9.99209910867819e-06,
"loss": 0.6864,
"step": 748
},
{
"epoch": 0.05,
"grad_norm": 0.8924134373664856,
"learning_rate": 9.9920413480322e-06,
"loss": 0.6364,
"step": 749
},
{
"epoch": 0.05,
"grad_norm": 0.9772643446922302,
"learning_rate": 9.991983377188715e-06,
"loss": 0.6503,
"step": 750
},
{
"epoch": 0.05,
"grad_norm": 0.9954730868339539,
"learning_rate": 9.991925196150174e-06,
"loss": 0.6672,
"step": 751
},
{
"epoch": 0.05,
"grad_norm": 1.0930321216583252,
"learning_rate": 9.99186680491903e-06,
"loss": 0.6267,
"step": 752
},
{
"epoch": 0.05,
"grad_norm": 0.9928365349769592,
"learning_rate": 9.99180820349774e-06,
"loss": 0.7189,
"step": 753
},
{
"epoch": 0.05,
"grad_norm": 0.9924033880233765,
"learning_rate": 9.991749391888772e-06,
"loss": 0.7041,
"step": 754
},
{
"epoch": 0.05,
"grad_norm": 1.0099812746047974,
"learning_rate": 9.991690370094603e-06,
"loss": 0.7206,
"step": 755
},
{
"epoch": 0.05,
"grad_norm": 1.0289372205734253,
"learning_rate": 9.991631138117715e-06,
"loss": 0.7392,
"step": 756
},
{
"epoch": 0.05,
"grad_norm": 1.0022187232971191,
"learning_rate": 9.991571695960606e-06,
"loss": 0.6903,
"step": 757
},
{
"epoch": 0.05,
"grad_norm": 0.8819312453269958,
"learning_rate": 9.991512043625777e-06,
"loss": 0.6078,
"step": 758
},
{
"epoch": 0.05,
"grad_norm": 0.9569171071052551,
"learning_rate": 9.991452181115739e-06,
"loss": 0.6521,
"step": 759
},
{
"epoch": 0.05,
"grad_norm": 0.9458112120628357,
"learning_rate": 9.991392108433016e-06,
"loss": 0.7201,
"step": 760
},
{
"epoch": 0.05,
"grad_norm": 0.9446436762809753,
"learning_rate": 9.991331825580132e-06,
"loss": 0.7091,
"step": 761
},
{
"epoch": 0.05,
"grad_norm": 1.0510190725326538,
"learning_rate": 9.99127133255963e-06,
"loss": 0.6936,
"step": 762
},
{
"epoch": 0.05,
"grad_norm": 0.9923396110534668,
"learning_rate": 9.991210629374058e-06,
"loss": 0.65,
"step": 763
},
{
"epoch": 0.05,
"grad_norm": 0.9962745904922485,
"learning_rate": 9.991149716025967e-06,
"loss": 0.6632,
"step": 764
},
{
"epoch": 0.05,
"grad_norm": 0.9799250960350037,
"learning_rate": 9.991088592517924e-06,
"loss": 0.6631,
"step": 765
},
{
"epoch": 0.05,
"grad_norm": 0.922505795955658,
"learning_rate": 9.991027258852505e-06,
"loss": 0.6594,
"step": 766
},
{
"epoch": 0.05,
"grad_norm": 0.919065535068512,
"learning_rate": 9.990965715032289e-06,
"loss": 0.5974,
"step": 767
},
{
"epoch": 0.05,
"grad_norm": 0.9268805384635925,
"learning_rate": 9.99090396105987e-06,
"loss": 0.6345,
"step": 768
},
{
"epoch": 0.05,
"grad_norm": 0.9739397168159485,
"learning_rate": 9.990841996937846e-06,
"loss": 0.6757,
"step": 769
},
{
"epoch": 0.05,
"grad_norm": 1.0037983655929565,
"learning_rate": 9.990779822668827e-06,
"loss": 0.714,
"step": 770
},
{
"epoch": 0.05,
"grad_norm": 0.9694761633872986,
"learning_rate": 9.990717438255435e-06,
"loss": 0.6614,
"step": 771
},
{
"epoch": 0.05,
"grad_norm": 0.962847113609314,
"learning_rate": 9.99065484370029e-06,
"loss": 0.7379,
"step": 772
},
{
"epoch": 0.05,
"grad_norm": 0.9772030115127563,
"learning_rate": 9.99059203900603e-06,
"loss": 0.651,
"step": 773
},
{
"epoch": 0.05,
"grad_norm": 0.9414916634559631,
"learning_rate": 9.990529024175303e-06,
"loss": 0.5766,
"step": 774
},
{
"epoch": 0.05,
"grad_norm": 0.970845103263855,
"learning_rate": 9.990465799210757e-06,
"loss": 0.6302,
"step": 775
},
{
"epoch": 0.05,
"grad_norm": 1.0166348218917847,
"learning_rate": 9.99040236411506e-06,
"loss": 0.745,
"step": 776
},
{
"epoch": 0.05,
"grad_norm": 1.0761529207229614,
"learning_rate": 9.990338718890878e-06,
"loss": 0.6747,
"step": 777
},
{
"epoch": 0.05,
"grad_norm": 0.9640312194824219,
"learning_rate": 9.990274863540891e-06,
"loss": 0.6303,
"step": 778
},
{
"epoch": 0.05,
"grad_norm": 1.0130929946899414,
"learning_rate": 9.990210798067792e-06,
"loss": 0.6996,
"step": 779
},
{
"epoch": 0.05,
"grad_norm": 1.0341368913650513,
"learning_rate": 9.990146522474273e-06,
"loss": 0.6914,
"step": 780
},
{
"epoch": 0.05,
"grad_norm": 0.9964221119880676,
"learning_rate": 9.990082036763046e-06,
"loss": 0.6798,
"step": 781
},
{
"epoch": 0.05,
"grad_norm": 0.9139377474784851,
"learning_rate": 9.990017340936823e-06,
"loss": 0.6492,
"step": 782
},
{
"epoch": 0.05,
"grad_norm": 0.9424077868461609,
"learning_rate": 9.989952434998328e-06,
"loss": 0.7023,
"step": 783
},
{
"epoch": 0.05,
"grad_norm": 0.9752070307731628,
"learning_rate": 9.989887318950295e-06,
"loss": 0.6834,
"step": 784
},
{
"epoch": 0.05,
"grad_norm": 0.9262385964393616,
"learning_rate": 9.989821992795467e-06,
"loss": 0.667,
"step": 785
},
{
"epoch": 0.05,
"grad_norm": 0.9452196955680847,
"learning_rate": 9.989756456536593e-06,
"loss": 0.6285,
"step": 786
},
{
"epoch": 0.05,
"grad_norm": 1.0110949277877808,
"learning_rate": 9.989690710176433e-06,
"loss": 0.6696,
"step": 787
},
{
"epoch": 0.05,
"grad_norm": 0.9881210327148438,
"learning_rate": 9.989624753717752e-06,
"loss": 0.6981,
"step": 788
},
{
"epoch": 0.05,
"grad_norm": 0.9513158798217773,
"learning_rate": 9.989558587163332e-06,
"loss": 0.6507,
"step": 789
},
{
"epoch": 0.05,
"grad_norm": 0.9474494457244873,
"learning_rate": 9.989492210515958e-06,
"loss": 0.6531,
"step": 790
},
{
"epoch": 0.05,
"grad_norm": 0.9730693101882935,
"learning_rate": 9.989425623778423e-06,
"loss": 0.6714,
"step": 791
},
{
"epoch": 0.05,
"grad_norm": 1.0155802965164185,
"learning_rate": 9.989358826953533e-06,
"loss": 0.6823,
"step": 792
},
{
"epoch": 0.05,
"grad_norm": 0.9369945526123047,
"learning_rate": 9.989291820044099e-06,
"loss": 0.6641,
"step": 793
},
{
"epoch": 0.05,
"grad_norm": 0.9431589245796204,
"learning_rate": 9.989224603052943e-06,
"loss": 0.6021,
"step": 794
},
{
"epoch": 0.05,
"grad_norm": 1.0468026399612427,
"learning_rate": 9.989157175982896e-06,
"loss": 0.7079,
"step": 795
},
{
"epoch": 0.05,
"grad_norm": 0.9889234900474548,
"learning_rate": 9.989089538836795e-06,
"loss": 0.6192,
"step": 796
},
{
"epoch": 0.05,
"grad_norm": 0.9767878651618958,
"learning_rate": 9.98902169161749e-06,
"loss": 0.6864,
"step": 797
},
{
"epoch": 0.05,
"grad_norm": 0.9971264004707336,
"learning_rate": 9.988953634327836e-06,
"loss": 0.7311,
"step": 798
},
{
"epoch": 0.05,
"grad_norm": 0.9408350586891174,
"learning_rate": 9.988885366970701e-06,
"loss": 0.7097,
"step": 799
},
{
"epoch": 0.05,
"grad_norm": 0.9743333458900452,
"learning_rate": 9.988816889548958e-06,
"loss": 0.6899,
"step": 800
},
{
"epoch": 0.05,
"grad_norm": 0.943428635597229,
"learning_rate": 9.98874820206549e-06,
"loss": 0.6598,
"step": 801
},
{
"epoch": 0.05,
"grad_norm": 0.9481057524681091,
"learning_rate": 9.988679304523192e-06,
"loss": 0.6171,
"step": 802
},
{
"epoch": 0.05,
"grad_norm": 0.9587128758430481,
"learning_rate": 9.988610196924962e-06,
"loss": 0.6731,
"step": 803
},
{
"epoch": 0.05,
"grad_norm": 0.9046414494514465,
"learning_rate": 9.98854087927371e-06,
"loss": 0.6688,
"step": 804
},
{
"epoch": 0.05,
"grad_norm": 0.9200078248977661,
"learning_rate": 9.988471351572355e-06,
"loss": 0.6444,
"step": 805
},
{
"epoch": 0.05,
"grad_norm": 1.0290337800979614,
"learning_rate": 9.988401613823825e-06,
"loss": 0.6902,
"step": 806
},
{
"epoch": 0.05,
"grad_norm": 0.9850117564201355,
"learning_rate": 9.988331666031056e-06,
"loss": 0.6844,
"step": 807
},
{
"epoch": 0.05,
"grad_norm": 1.0165541172027588,
"learning_rate": 9.988261508196994e-06,
"loss": 0.6588,
"step": 808
},
{
"epoch": 0.05,
"grad_norm": 0.9509625434875488,
"learning_rate": 9.988191140324595e-06,
"loss": 0.6395,
"step": 809
},
{
"epoch": 0.05,
"grad_norm": 0.8759293556213379,
"learning_rate": 9.988120562416817e-06,
"loss": 0.6624,
"step": 810
},
{
"epoch": 0.05,
"grad_norm": 1.0014184713363647,
"learning_rate": 9.988049774476636e-06,
"loss": 0.658,
"step": 811
},
{
"epoch": 0.05,
"grad_norm": 0.9247162938117981,
"learning_rate": 9.98797877650703e-06,
"loss": 0.6577,
"step": 812
},
{
"epoch": 0.05,
"grad_norm": 0.9179931879043579,
"learning_rate": 9.987907568510991e-06,
"loss": 0.6404,
"step": 813
},
{
"epoch": 0.05,
"grad_norm": 1.043821930885315,
"learning_rate": 9.987836150491515e-06,
"loss": 0.708,
"step": 814
},
{
"epoch": 0.05,
"grad_norm": 1.0630831718444824,
"learning_rate": 9.98776452245161e-06,
"loss": 0.6716,
"step": 815
},
{
"epoch": 0.05,
"grad_norm": 0.825951874256134,
"learning_rate": 9.987692684394294e-06,
"loss": 0.6198,
"step": 816
},
{
"epoch": 0.05,
"grad_norm": 1.03840172290802,
"learning_rate": 9.987620636322589e-06,
"loss": 0.6358,
"step": 817
},
{
"epoch": 0.05,
"grad_norm": 0.9848338961601257,
"learning_rate": 9.987548378239529e-06,
"loss": 0.7417,
"step": 818
},
{
"epoch": 0.05,
"grad_norm": 1.0349116325378418,
"learning_rate": 9.987475910148156e-06,
"loss": 0.6809,
"step": 819
},
{
"epoch": 0.05,
"grad_norm": 0.8631666898727417,
"learning_rate": 9.987403232051525e-06,
"loss": 0.6175,
"step": 820
},
{
"epoch": 0.05,
"grad_norm": 0.972474992275238,
"learning_rate": 9.987330343952692e-06,
"loss": 0.6336,
"step": 821
},
{
"epoch": 0.05,
"grad_norm": 0.9713488817214966,
"learning_rate": 9.987257245854729e-06,
"loss": 0.6455,
"step": 822
},
{
"epoch": 0.05,
"grad_norm": 0.9748914241790771,
"learning_rate": 9.987183937760713e-06,
"loss": 0.6871,
"step": 823
},
{
"epoch": 0.05,
"grad_norm": 0.9042195081710815,
"learning_rate": 9.98711041967373e-06,
"loss": 0.674,
"step": 824
},
{
"epoch": 0.05,
"grad_norm": 0.9888205528259277,
"learning_rate": 9.987036691596877e-06,
"loss": 0.6587,
"step": 825
},
{
"epoch": 0.05,
"grad_norm": 1.0192331075668335,
"learning_rate": 9.986962753533257e-06,
"loss": 0.6968,
"step": 826
},
{
"epoch": 0.05,
"grad_norm": 0.9436393976211548,
"learning_rate": 9.986888605485983e-06,
"loss": 0.615,
"step": 827
},
{
"epoch": 0.05,
"grad_norm": 0.9654821753501892,
"learning_rate": 9.986814247458177e-06,
"loss": 0.6473,
"step": 828
},
{
"epoch": 0.05,
"grad_norm": 1.0658471584320068,
"learning_rate": 9.986739679452973e-06,
"loss": 0.7004,
"step": 829
},
{
"epoch": 0.05,
"grad_norm": 0.9651502966880798,
"learning_rate": 9.986664901473508e-06,
"loss": 0.5893,
"step": 830
},
{
"epoch": 0.05,
"grad_norm": 0.9251554608345032,
"learning_rate": 9.98658991352293e-06,
"loss": 0.6431,
"step": 831
},
{
"epoch": 0.05,
"grad_norm": 0.973141610622406,
"learning_rate": 9.986514715604401e-06,
"loss": 0.6458,
"step": 832
},
{
"epoch": 0.05,
"grad_norm": 1.0335615873336792,
"learning_rate": 9.986439307721083e-06,
"loss": 0.6615,
"step": 833
},
{
"epoch": 0.05,
"grad_norm": 1.0050101280212402,
"learning_rate": 9.98636368987615e-06,
"loss": 0.6729,
"step": 834
},
{
"epoch": 0.05,
"grad_norm": 0.9446513652801514,
"learning_rate": 9.98628786207279e-06,
"loss": 0.6221,
"step": 835
},
{
"epoch": 0.05,
"grad_norm": 0.9160159230232239,
"learning_rate": 9.986211824314193e-06,
"loss": 0.6568,
"step": 836
},
{
"epoch": 0.05,
"grad_norm": 0.9735708236694336,
"learning_rate": 9.986135576603564e-06,
"loss": 0.7023,
"step": 837
},
{
"epoch": 0.05,
"grad_norm": 0.996498167514801,
"learning_rate": 9.98605911894411e-06,
"loss": 0.6713,
"step": 838
},
{
"epoch": 0.05,
"grad_norm": 1.0343009233474731,
"learning_rate": 9.985982451339054e-06,
"loss": 0.6776,
"step": 839
},
{
"epoch": 0.05,
"grad_norm": 0.9924929141998291,
"learning_rate": 9.985905573791619e-06,
"loss": 0.6904,
"step": 840
},
{
"epoch": 0.05,
"grad_norm": 0.9932686686515808,
"learning_rate": 9.985828486305046e-06,
"loss": 0.692,
"step": 841
},
{
"epoch": 0.05,
"grad_norm": 0.9492565989494324,
"learning_rate": 9.98575118888258e-06,
"loss": 0.6513,
"step": 842
},
{
"epoch": 0.05,
"grad_norm": 0.9628197550773621,
"learning_rate": 9.985673681527474e-06,
"loss": 0.6683,
"step": 843
},
{
"epoch": 0.05,
"grad_norm": 1.0421559810638428,
"learning_rate": 9.985595964242996e-06,
"loss": 0.6264,
"step": 844
},
{
"epoch": 0.05,
"grad_norm": 1.0385863780975342,
"learning_rate": 9.985518037032413e-06,
"loss": 0.7485,
"step": 845
},
{
"epoch": 0.05,
"grad_norm": 0.9698561429977417,
"learning_rate": 9.98543989989901e-06,
"loss": 0.6673,
"step": 846
},
{
"epoch": 0.05,
"grad_norm": 1.1064468622207642,
"learning_rate": 9.985361552846076e-06,
"loss": 0.679,
"step": 847
},
{
"epoch": 0.05,
"grad_norm": 0.9422234296798706,
"learning_rate": 9.98528299587691e-06,
"loss": 0.621,
"step": 848
},
{
"epoch": 0.05,
"grad_norm": 0.9520366787910461,
"learning_rate": 9.98520422899482e-06,
"loss": 0.6739,
"step": 849
},
{
"epoch": 0.05,
"grad_norm": 1.012834072113037,
"learning_rate": 9.985125252203122e-06,
"loss": 0.6486,
"step": 850
},
{
"epoch": 0.05,
"grad_norm": 0.9515576958656311,
"learning_rate": 9.985046065505141e-06,
"loss": 0.6631,
"step": 851
},
{
"epoch": 0.05,
"grad_norm": 0.9131986498832703,
"learning_rate": 9.984966668904211e-06,
"loss": 0.6183,
"step": 852
},
{
"epoch": 0.05,
"grad_norm": 1.0273786783218384,
"learning_rate": 9.984887062403678e-06,
"loss": 0.6952,
"step": 853
},
{
"epoch": 0.05,
"grad_norm": 1.0616415739059448,
"learning_rate": 9.984807246006891e-06,
"loss": 0.6604,
"step": 854
},
{
"epoch": 0.05,
"grad_norm": 1.0264229774475098,
"learning_rate": 9.984727219717212e-06,
"loss": 0.6836,
"step": 855
},
{
"epoch": 0.05,
"grad_norm": 0.9620240926742554,
"learning_rate": 9.984646983538009e-06,
"loss": 0.6725,
"step": 856
},
{
"epoch": 0.05,
"grad_norm": 1.026843786239624,
"learning_rate": 9.984566537472662e-06,
"loss": 0.6973,
"step": 857
},
{
"epoch": 0.05,
"grad_norm": 0.910517156124115,
"learning_rate": 9.98448588152456e-06,
"loss": 0.6826,
"step": 858
},
{
"epoch": 0.05,
"grad_norm": 0.8792157769203186,
"learning_rate": 9.984405015697097e-06,
"loss": 0.6207,
"step": 859
},
{
"epoch": 0.05,
"grad_norm": 1.026893973350525,
"learning_rate": 9.984323939993678e-06,
"loss": 0.6576,
"step": 860
},
{
"epoch": 0.05,
"grad_norm": 0.9753056168556213,
"learning_rate": 9.984242654417716e-06,
"loss": 0.7123,
"step": 861
},
{
"epoch": 0.05,
"grad_norm": 0.9340549111366272,
"learning_rate": 9.984161158972636e-06,
"loss": 0.6311,
"step": 862
},
{
"epoch": 0.05,
"grad_norm": 0.958814799785614,
"learning_rate": 9.984079453661869e-06,
"loss": 0.6848,
"step": 863
},
{
"epoch": 0.05,
"grad_norm": 1.02495539188385,
"learning_rate": 9.983997538488851e-06,
"loss": 0.6614,
"step": 864
},
{
"epoch": 0.05,
"grad_norm": 0.9181699156761169,
"learning_rate": 9.983915413457036e-06,
"loss": 0.6182,
"step": 865
},
{
"epoch": 0.05,
"grad_norm": 0.9128296375274658,
"learning_rate": 9.983833078569883e-06,
"loss": 0.6696,
"step": 866
},
{
"epoch": 0.05,
"grad_norm": 0.9677926301956177,
"learning_rate": 9.983750533830856e-06,
"loss": 0.7208,
"step": 867
},
{
"epoch": 0.05,
"grad_norm": 1.0382143259048462,
"learning_rate": 9.98366777924343e-06,
"loss": 0.7064,
"step": 868
},
{
"epoch": 0.06,
"grad_norm": 1.0093235969543457,
"learning_rate": 9.983584814811092e-06,
"loss": 0.6637,
"step": 869
},
{
"epoch": 0.06,
"grad_norm": 0.9793532490730286,
"learning_rate": 9.983501640537333e-06,
"loss": 0.7384,
"step": 870
},
{
"epoch": 0.06,
"grad_norm": 0.9115772247314453,
"learning_rate": 9.983418256425656e-06,
"loss": 0.6931,
"step": 871
},
{
"epoch": 0.06,
"grad_norm": 1.007819414138794,
"learning_rate": 9.983334662479572e-06,
"loss": 0.6364,
"step": 872
},
{
"epoch": 0.06,
"grad_norm": 0.9625717997550964,
"learning_rate": 9.983250858702603e-06,
"loss": 0.6219,
"step": 873
},
{
"epoch": 0.06,
"grad_norm": 0.9227074980735779,
"learning_rate": 9.983166845098275e-06,
"loss": 0.6472,
"step": 874
},
{
"epoch": 0.06,
"grad_norm": 1.024123191833496,
"learning_rate": 9.983082621670126e-06,
"loss": 0.6085,
"step": 875
},
{
"epoch": 0.06,
"grad_norm": 0.9824538826942444,
"learning_rate": 9.982998188421702e-06,
"loss": 0.7077,
"step": 876
},
{
"epoch": 0.06,
"grad_norm": 0.9731464982032776,
"learning_rate": 9.98291354535656e-06,
"loss": 0.6783,
"step": 877
},
{
"epoch": 0.06,
"grad_norm": 0.9325253367424011,
"learning_rate": 9.982828692478261e-06,
"loss": 0.6467,
"step": 878
},
{
"epoch": 0.06,
"grad_norm": 1.0967223644256592,
"learning_rate": 9.982743629790382e-06,
"loss": 0.6342,
"step": 879
},
{
"epoch": 0.06,
"grad_norm": 0.9630370140075684,
"learning_rate": 9.982658357296502e-06,
"loss": 0.6359,
"step": 880
},
{
"epoch": 0.06,
"grad_norm": 0.9850766062736511,
"learning_rate": 9.982572875000212e-06,
"loss": 0.6571,
"step": 881
},
{
"epoch": 0.06,
"grad_norm": 1.0202929973602295,
"learning_rate": 9.98248718290511e-06,
"loss": 0.7151,
"step": 882
},
{
"epoch": 0.06,
"grad_norm": 0.9093936085700989,
"learning_rate": 9.982401281014806e-06,
"loss": 0.6123,
"step": 883
},
{
"epoch": 0.06,
"grad_norm": 1.017791748046875,
"learning_rate": 9.982315169332918e-06,
"loss": 0.7209,
"step": 884
},
{
"epoch": 0.06,
"grad_norm": 1.0141305923461914,
"learning_rate": 9.982228847863069e-06,
"loss": 0.6597,
"step": 885
},
{
"epoch": 0.06,
"grad_norm": 0.9242052435874939,
"learning_rate": 9.982142316608897e-06,
"loss": 0.6469,
"step": 886
},
{
"epoch": 0.06,
"grad_norm": 0.9739558100700378,
"learning_rate": 9.982055575574042e-06,
"loss": 0.6735,
"step": 887
},
{
"epoch": 0.06,
"grad_norm": 1.027789831161499,
"learning_rate": 9.981968624762159e-06,
"loss": 0.7039,
"step": 888
},
{
"epoch": 0.06,
"grad_norm": 1.0447962284088135,
"learning_rate": 9.981881464176908e-06,
"loss": 0.6446,
"step": 889
},
{
"epoch": 0.06,
"grad_norm": 1.0435072183609009,
"learning_rate": 9.981794093821957e-06,
"loss": 0.6343,
"step": 890
},
{
"epoch": 0.06,
"grad_norm": 0.9777998924255371,
"learning_rate": 9.981706513700989e-06,
"loss": 0.6912,
"step": 891
},
{
"epoch": 0.06,
"grad_norm": 0.9571147561073303,
"learning_rate": 9.98161872381769e-06,
"loss": 0.6138,
"step": 892
},
{
"epoch": 0.06,
"grad_norm": 0.9109900593757629,
"learning_rate": 9.981530724175756e-06,
"loss": 0.5711,
"step": 893
},
{
"epoch": 0.06,
"grad_norm": 0.95176762342453,
"learning_rate": 9.981442514778892e-06,
"loss": 0.6553,
"step": 894
},
{
"epoch": 0.06,
"grad_norm": 0.890281081199646,
"learning_rate": 9.981354095630816e-06,
"loss": 0.6194,
"step": 895
},
{
"epoch": 0.06,
"grad_norm": 0.9268616437911987,
"learning_rate": 9.981265466735244e-06,
"loss": 0.6303,
"step": 896
},
{
"epoch": 0.06,
"grad_norm": 0.961388885974884,
"learning_rate": 9.981176628095913e-06,
"loss": 0.6764,
"step": 897
},
{
"epoch": 0.06,
"grad_norm": 1.0211546421051025,
"learning_rate": 9.981087579716564e-06,
"loss": 0.6545,
"step": 898
},
{
"epoch": 0.06,
"grad_norm": 0.9296051859855652,
"learning_rate": 9.980998321600944e-06,
"loss": 0.613,
"step": 899
},
{
"epoch": 0.06,
"grad_norm": 0.9536461234092712,
"learning_rate": 9.98090885375281e-06,
"loss": 0.7281,
"step": 900
},
{
"epoch": 0.06,
"grad_norm": 0.9516177773475647,
"learning_rate": 9.980819176175932e-06,
"loss": 0.6636,
"step": 901
},
{
"epoch": 0.06,
"grad_norm": 0.9591559767723083,
"learning_rate": 9.980729288874088e-06,
"loss": 0.6949,
"step": 902
},
{
"epoch": 0.06,
"grad_norm": 1.0467829704284668,
"learning_rate": 9.98063919185106e-06,
"loss": 0.6268,
"step": 903
},
{
"epoch": 0.06,
"grad_norm": 1.0166997909545898,
"learning_rate": 9.980548885110641e-06,
"loss": 0.6232,
"step": 904
},
{
"epoch": 0.06,
"grad_norm": 0.9611027240753174,
"learning_rate": 9.980458368656635e-06,
"loss": 0.7333,
"step": 905
},
{
"epoch": 0.06,
"grad_norm": 0.9568818807601929,
"learning_rate": 9.980367642492852e-06,
"loss": 0.7402,
"step": 906
},
{
"epoch": 0.06,
"grad_norm": 0.9771458506584167,
"learning_rate": 9.980276706623114e-06,
"loss": 0.6486,
"step": 907
},
{
"epoch": 0.06,
"grad_norm": 0.8996546268463135,
"learning_rate": 9.98018556105125e-06,
"loss": 0.6198,
"step": 908
},
{
"epoch": 0.06,
"grad_norm": 1.0799936056137085,
"learning_rate": 9.980094205781094e-06,
"loss": 0.6487,
"step": 909
},
{
"epoch": 0.06,
"grad_norm": 0.9570757150650024,
"learning_rate": 9.980002640816498e-06,
"loss": 0.6478,
"step": 910
},
{
"epoch": 0.06,
"grad_norm": 0.9597965478897095,
"learning_rate": 9.979910866161313e-06,
"loss": 0.7147,
"step": 911
},
{
"epoch": 0.06,
"grad_norm": 0.9495083689689636,
"learning_rate": 9.979818881819408e-06,
"loss": 0.6683,
"step": 912
},
{
"epoch": 0.06,
"grad_norm": 0.9899744987487793,
"learning_rate": 9.979726687794651e-06,
"loss": 0.752,
"step": 913
},
{
"epoch": 0.06,
"grad_norm": 0.9405885934829712,
"learning_rate": 9.979634284090927e-06,
"loss": 0.6088,
"step": 914
},
{
"epoch": 0.06,
"grad_norm": 0.9481942057609558,
"learning_rate": 9.979541670712125e-06,
"loss": 0.706,
"step": 915
},
{
"epoch": 0.06,
"grad_norm": 1.053983211517334,
"learning_rate": 9.979448847662148e-06,
"loss": 0.6401,
"step": 916
},
{
"epoch": 0.06,
"grad_norm": 0.963388204574585,
"learning_rate": 9.979355814944901e-06,
"loss": 0.7043,
"step": 917
},
{
"epoch": 0.06,
"grad_norm": 1.0153307914733887,
"learning_rate": 9.979262572564303e-06,
"loss": 0.62,
"step": 918
},
{
"epoch": 0.06,
"grad_norm": 0.9814804196357727,
"learning_rate": 9.979169120524279e-06,
"loss": 0.637,
"step": 919
},
{
"epoch": 0.06,
"grad_norm": 0.9698815941810608,
"learning_rate": 9.979075458828765e-06,
"loss": 0.6741,
"step": 920
},
{
"epoch": 0.06,
"grad_norm": 0.9134169816970825,
"learning_rate": 9.978981587481705e-06,
"loss": 0.6723,
"step": 921
},
{
"epoch": 0.06,
"grad_norm": 1.1175658702850342,
"learning_rate": 9.978887506487049e-06,
"loss": 0.744,
"step": 922
},
{
"epoch": 0.06,
"grad_norm": 0.9176881909370422,
"learning_rate": 9.978793215848763e-06,
"loss": 0.6377,
"step": 923
},
{
"epoch": 0.06,
"grad_norm": 1.0039703845977783,
"learning_rate": 9.978698715570814e-06,
"loss": 0.6938,
"step": 924
},
{
"epoch": 0.06,
"grad_norm": 0.969764232635498,
"learning_rate": 9.97860400565718e-06,
"loss": 0.6361,
"step": 925
},
{
"epoch": 0.06,
"grad_norm": 0.9281071424484253,
"learning_rate": 9.978509086111852e-06,
"loss": 0.6727,
"step": 926
},
{
"epoch": 0.06,
"grad_norm": 0.9164725542068481,
"learning_rate": 9.978413956938824e-06,
"loss": 0.6378,
"step": 927
},
{
"epoch": 0.06,
"grad_norm": 0.9597803354263306,
"learning_rate": 9.978318618142104e-06,
"loss": 0.6448,
"step": 928
},
{
"epoch": 0.06,
"grad_norm": 0.9781709313392639,
"learning_rate": 9.978223069725706e-06,
"loss": 0.626,
"step": 929
},
{
"epoch": 0.06,
"grad_norm": 0.9814243912696838,
"learning_rate": 9.978127311693653e-06,
"loss": 0.5937,
"step": 930
},
{
"epoch": 0.06,
"grad_norm": 0.9959214925765991,
"learning_rate": 9.978031344049975e-06,
"loss": 0.6537,
"step": 931
},
{
"epoch": 0.06,
"grad_norm": 1.018471121788025,
"learning_rate": 9.977935166798714e-06,
"loss": 0.6903,
"step": 932
},
{
"epoch": 0.06,
"grad_norm": 0.9168115854263306,
"learning_rate": 9.977838779943921e-06,
"loss": 0.6261,
"step": 933
},
{
"epoch": 0.06,
"grad_norm": 1.0428775548934937,
"learning_rate": 9.977742183489653e-06,
"loss": 0.7146,
"step": 934
},
{
"epoch": 0.06,
"grad_norm": 0.9918137192726135,
"learning_rate": 9.977645377439979e-06,
"loss": 0.6469,
"step": 935
},
{
"epoch": 0.06,
"grad_norm": 0.9870752096176147,
"learning_rate": 9.977548361798975e-06,
"loss": 0.6586,
"step": 936
},
{
"epoch": 0.06,
"grad_norm": 0.9916146993637085,
"learning_rate": 9.977451136570722e-06,
"loss": 0.6951,
"step": 937
},
{
"epoch": 0.06,
"grad_norm": 0.894088089466095,
"learning_rate": 9.97735370175932e-06,
"loss": 0.6327,
"step": 938
},
{
"epoch": 0.06,
"grad_norm": 0.9738256335258484,
"learning_rate": 9.977256057368865e-06,
"loss": 0.6731,
"step": 939
},
{
"epoch": 0.06,
"grad_norm": 0.9997884631156921,
"learning_rate": 9.977158203403475e-06,
"loss": 0.6635,
"step": 940
},
{
"epoch": 0.06,
"grad_norm": 0.9893252849578857,
"learning_rate": 9.977060139867268e-06,
"loss": 0.6278,
"step": 941
},
{
"epoch": 0.06,
"grad_norm": 0.8766104578971863,
"learning_rate": 9.97696186676437e-06,
"loss": 0.5792,
"step": 942
},
{
"epoch": 0.06,
"grad_norm": 0.9894992113113403,
"learning_rate": 9.976863384098923e-06,
"loss": 0.6855,
"step": 943
},
{
"epoch": 0.06,
"grad_norm": 1.0352369546890259,
"learning_rate": 9.976764691875072e-06,
"loss": 0.7075,
"step": 944
},
{
"epoch": 0.06,
"grad_norm": 0.996104896068573,
"learning_rate": 9.976665790096971e-06,
"loss": 0.6464,
"step": 945
},
{
"epoch": 0.06,
"grad_norm": 0.9106736779212952,
"learning_rate": 9.976566678768787e-06,
"loss": 0.5984,
"step": 946
},
{
"epoch": 0.06,
"grad_norm": 0.9899172186851501,
"learning_rate": 9.976467357894693e-06,
"loss": 0.6433,
"step": 947
},
{
"epoch": 0.06,
"grad_norm": 0.9473981261253357,
"learning_rate": 9.97636782747887e-06,
"loss": 0.7038,
"step": 948
},
{
"epoch": 0.06,
"grad_norm": 1.0071048736572266,
"learning_rate": 9.976268087525509e-06,
"loss": 0.7246,
"step": 949
},
{
"epoch": 0.06,
"grad_norm": 0.9809601902961731,
"learning_rate": 9.976168138038812e-06,
"loss": 0.6786,
"step": 950
},
{
"epoch": 0.06,
"grad_norm": 0.9237947463989258,
"learning_rate": 9.976067979022983e-06,
"loss": 0.685,
"step": 951
},
{
"epoch": 0.06,
"grad_norm": 0.9583842754364014,
"learning_rate": 9.975967610482243e-06,
"loss": 0.6639,
"step": 952
},
{
"epoch": 0.06,
"grad_norm": 0.9147073030471802,
"learning_rate": 9.975867032420816e-06,
"loss": 0.6512,
"step": 953
},
{
"epoch": 0.06,
"grad_norm": 0.9555047750473022,
"learning_rate": 9.97576624484294e-06,
"loss": 0.7145,
"step": 954
},
{
"epoch": 0.06,
"grad_norm": 0.9743271470069885,
"learning_rate": 9.975665247752855e-06,
"loss": 0.6743,
"step": 955
},
{
"epoch": 0.06,
"grad_norm": 0.9561975598335266,
"learning_rate": 9.975564041154817e-06,
"loss": 0.6491,
"step": 956
},
{
"epoch": 0.06,
"grad_norm": 0.971872091293335,
"learning_rate": 9.975462625053085e-06,
"loss": 0.696,
"step": 957
},
{
"epoch": 0.06,
"grad_norm": 0.9306091666221619,
"learning_rate": 9.97536099945193e-06,
"loss": 0.6438,
"step": 958
},
{
"epoch": 0.06,
"grad_norm": 0.9069042205810547,
"learning_rate": 9.975259164355632e-06,
"loss": 0.6829,
"step": 959
},
{
"epoch": 0.06,
"grad_norm": 1.0041426420211792,
"learning_rate": 9.97515711976848e-06,
"loss": 0.6783,
"step": 960
},
{
"epoch": 0.06,
"grad_norm": 1.1071757078170776,
"learning_rate": 9.975054865694767e-06,
"loss": 0.6785,
"step": 961
},
{
"epoch": 0.06,
"grad_norm": 0.9503558278083801,
"learning_rate": 9.9749524021388e-06,
"loss": 0.7091,
"step": 962
},
{
"epoch": 0.06,
"grad_norm": 0.9102316498756409,
"learning_rate": 9.974849729104894e-06,
"loss": 0.7051,
"step": 963
},
{
"epoch": 0.06,
"grad_norm": 1.003288984298706,
"learning_rate": 9.974746846597373e-06,
"loss": 0.7456,
"step": 964
},
{
"epoch": 0.06,
"grad_norm": 0.9375484585762024,
"learning_rate": 9.974643754620567e-06,
"loss": 0.6835,
"step": 965
},
{
"epoch": 0.06,
"grad_norm": 1.0092264413833618,
"learning_rate": 9.97454045317882e-06,
"loss": 0.672,
"step": 966
},
{
"epoch": 0.06,
"grad_norm": 0.9897353053092957,
"learning_rate": 9.974436942276477e-06,
"loss": 0.6498,
"step": 967
},
{
"epoch": 0.06,
"grad_norm": 0.9781389236450195,
"learning_rate": 9.974333221917903e-06,
"loss": 0.6988,
"step": 968
},
{
"epoch": 0.06,
"grad_norm": 0.8853163719177246,
"learning_rate": 9.974229292107458e-06,
"loss": 0.6391,
"step": 969
},
{
"epoch": 0.06,
"grad_norm": 1.3123575448989868,
"learning_rate": 9.974125152849523e-06,
"loss": 0.689,
"step": 970
},
{
"epoch": 0.06,
"grad_norm": 0.9711446166038513,
"learning_rate": 9.974020804148482e-06,
"loss": 0.645,
"step": 971
},
{
"epoch": 0.06,
"grad_norm": 0.9429543614387512,
"learning_rate": 9.973916246008727e-06,
"loss": 0.6561,
"step": 972
},
{
"epoch": 0.06,
"grad_norm": 0.9982245564460754,
"learning_rate": 9.973811478434662e-06,
"loss": 0.6915,
"step": 973
},
{
"epoch": 0.06,
"grad_norm": 0.9405156373977661,
"learning_rate": 9.9737065014307e-06,
"loss": 0.6852,
"step": 974
},
{
"epoch": 0.06,
"grad_norm": 0.9267737865447998,
"learning_rate": 9.973601315001258e-06,
"loss": 0.6724,
"step": 975
},
{
"epoch": 0.06,
"grad_norm": 0.9884217977523804,
"learning_rate": 9.973495919150766e-06,
"loss": 0.6892,
"step": 976
},
{
"epoch": 0.06,
"grad_norm": 0.8952025771141052,
"learning_rate": 9.973390313883664e-06,
"loss": 0.6149,
"step": 977
},
{
"epoch": 0.06,
"grad_norm": 0.916035532951355,
"learning_rate": 9.973284499204396e-06,
"loss": 0.6147,
"step": 978
},
{
"epoch": 0.06,
"grad_norm": 0.9775811433792114,
"learning_rate": 9.973178475117419e-06,
"loss": 0.6582,
"step": 979
},
{
"epoch": 0.06,
"grad_norm": 0.942755401134491,
"learning_rate": 9.973072241627196e-06,
"loss": 0.7021,
"step": 980
},
{
"epoch": 0.06,
"grad_norm": 1.0862394571304321,
"learning_rate": 9.972965798738202e-06,
"loss": 0.7022,
"step": 981
},
{
"epoch": 0.06,
"grad_norm": 0.8991437554359436,
"learning_rate": 9.972859146454917e-06,
"loss": 0.6008,
"step": 982
},
{
"epoch": 0.06,
"grad_norm": 0.9662036895751953,
"learning_rate": 9.972752284781832e-06,
"loss": 0.6783,
"step": 983
},
{
"epoch": 0.06,
"grad_norm": 0.9464378952980042,
"learning_rate": 9.97264521372345e-06,
"loss": 0.6452,
"step": 984
},
{
"epoch": 0.06,
"grad_norm": 0.9118345975875854,
"learning_rate": 9.972537933284274e-06,
"loss": 0.6253,
"step": 985
},
{
"epoch": 0.06,
"grad_norm": 0.9645686745643616,
"learning_rate": 9.972430443468826e-06,
"loss": 0.6812,
"step": 986
},
{
"epoch": 0.06,
"grad_norm": 0.8795304894447327,
"learning_rate": 9.972322744281628e-06,
"loss": 0.5986,
"step": 987
},
{
"epoch": 0.06,
"grad_norm": 0.9460269808769226,
"learning_rate": 9.972214835727218e-06,
"loss": 0.6976,
"step": 988
},
{
"epoch": 0.06,
"grad_norm": 0.9464765191078186,
"learning_rate": 9.972106717810137e-06,
"loss": 0.6538,
"step": 989
},
{
"epoch": 0.06,
"grad_norm": 0.9307100176811218,
"learning_rate": 9.97199839053494e-06,
"loss": 0.7183,
"step": 990
},
{
"epoch": 0.06,
"grad_norm": 0.978036105632782,
"learning_rate": 9.971889853906186e-06,
"loss": 0.6879,
"step": 991
},
{
"epoch": 0.06,
"grad_norm": 0.9477901458740234,
"learning_rate": 9.971781107928447e-06,
"loss": 0.6093,
"step": 992
},
{
"epoch": 0.06,
"grad_norm": 0.9999047517776489,
"learning_rate": 9.9716721526063e-06,
"loss": 0.6431,
"step": 993
},
{
"epoch": 0.06,
"grad_norm": 0.9746558666229248,
"learning_rate": 9.971562987944336e-06,
"loss": 0.6916,
"step": 994
},
{
"epoch": 0.06,
"grad_norm": 0.9560214281082153,
"learning_rate": 9.971453613947147e-06,
"loss": 0.6746,
"step": 995
},
{
"epoch": 0.06,
"grad_norm": 1.0287420749664307,
"learning_rate": 9.971344030619342e-06,
"loss": 0.6463,
"step": 996
},
{
"epoch": 0.06,
"grad_norm": 0.9547033309936523,
"learning_rate": 9.971234237965534e-06,
"loss": 0.6567,
"step": 997
},
{
"epoch": 0.06,
"grad_norm": 0.9555925130844116,
"learning_rate": 9.971124235990346e-06,
"loss": 0.6834,
"step": 998
},
{
"epoch": 0.06,
"grad_norm": 0.9444142580032349,
"learning_rate": 9.971014024698408e-06,
"loss": 0.6531,
"step": 999
},
{
"epoch": 0.06,
"grad_norm": 1.0372717380523682,
"learning_rate": 9.970903604094365e-06,
"loss": 0.6779,
"step": 1000
},
{
"epoch": 0.06,
"grad_norm": 0.9465329051017761,
"learning_rate": 9.970792974182863e-06,
"loss": 0.6849,
"step": 1001
},
{
"epoch": 0.06,
"grad_norm": 0.9567575454711914,
"learning_rate": 9.97068213496856e-06,
"loss": 0.6118,
"step": 1002
},
{
"epoch": 0.06,
"grad_norm": 0.9264045357704163,
"learning_rate": 9.970571086456124e-06,
"loss": 0.6617,
"step": 1003
},
{
"epoch": 0.06,
"grad_norm": 0.9825202226638794,
"learning_rate": 9.970459828650232e-06,
"loss": 0.6393,
"step": 1004
},
{
"epoch": 0.06,
"grad_norm": 0.9990862011909485,
"learning_rate": 9.970348361555566e-06,
"loss": 0.633,
"step": 1005
},
{
"epoch": 0.06,
"grad_norm": 0.9339586496353149,
"learning_rate": 9.970236685176821e-06,
"loss": 0.6845,
"step": 1006
},
{
"epoch": 0.06,
"grad_norm": 0.9837610125541687,
"learning_rate": 9.9701247995187e-06,
"loss": 0.6371,
"step": 1007
},
{
"epoch": 0.06,
"grad_norm": 1.0499521493911743,
"learning_rate": 9.970012704585916e-06,
"loss": 0.6754,
"step": 1008
},
{
"epoch": 0.06,
"grad_norm": 1.0463135242462158,
"learning_rate": 9.969900400383183e-06,
"loss": 0.6943,
"step": 1009
},
{
"epoch": 0.06,
"grad_norm": 0.9164577126502991,
"learning_rate": 9.969787886915236e-06,
"loss": 0.6393,
"step": 1010
},
{
"epoch": 0.06,
"grad_norm": 0.9350469708442688,
"learning_rate": 9.969675164186807e-06,
"loss": 0.6697,
"step": 1011
},
{
"epoch": 0.06,
"grad_norm": 0.9471501111984253,
"learning_rate": 9.969562232202647e-06,
"loss": 0.6617,
"step": 1012
},
{
"epoch": 0.06,
"grad_norm": 0.9739314913749695,
"learning_rate": 9.969449090967509e-06,
"loss": 0.6864,
"step": 1013
},
{
"epoch": 0.06,
"grad_norm": 0.9026748538017273,
"learning_rate": 9.969335740486157e-06,
"loss": 0.5679,
"step": 1014
},
{
"epoch": 0.06,
"grad_norm": 0.9329193830490112,
"learning_rate": 9.969222180763363e-06,
"loss": 0.6714,
"step": 1015
},
{
"epoch": 0.06,
"grad_norm": 1.0058557987213135,
"learning_rate": 9.96910841180391e-06,
"loss": 0.6131,
"step": 1016
},
{
"epoch": 0.06,
"grad_norm": 1.0142805576324463,
"learning_rate": 9.968994433612589e-06,
"loss": 0.7093,
"step": 1017
},
{
"epoch": 0.06,
"grad_norm": 0.9591155648231506,
"learning_rate": 9.968880246194198e-06,
"loss": 0.6828,
"step": 1018
},
{
"epoch": 0.06,
"grad_norm": 0.9221545457839966,
"learning_rate": 9.968765849553544e-06,
"loss": 0.6824,
"step": 1019
},
{
"epoch": 0.06,
"grad_norm": 0.8757246136665344,
"learning_rate": 9.968651243695446e-06,
"loss": 0.6282,
"step": 1020
},
{
"epoch": 0.06,
"grad_norm": 1.0177953243255615,
"learning_rate": 9.968536428624729e-06,
"loss": 0.6673,
"step": 1021
},
{
"epoch": 0.06,
"grad_norm": 0.9448785781860352,
"learning_rate": 9.968421404346228e-06,
"loss": 0.6768,
"step": 1022
},
{
"epoch": 0.06,
"grad_norm": 1.0076022148132324,
"learning_rate": 9.968306170864786e-06,
"loss": 0.7581,
"step": 1023
},
{
"epoch": 0.06,
"grad_norm": 1.0110529661178589,
"learning_rate": 9.968190728185251e-06,
"loss": 0.6518,
"step": 1024
},
{
"epoch": 0.06,
"grad_norm": 0.8694904446601868,
"learning_rate": 9.968075076312492e-06,
"loss": 0.6453,
"step": 1025
},
{
"epoch": 0.07,
"grad_norm": 0.9269656538963318,
"learning_rate": 9.96795921525137e-06,
"loss": 0.6668,
"step": 1026
},
{
"epoch": 0.07,
"grad_norm": 0.9647197723388672,
"learning_rate": 9.967843145006771e-06,
"loss": 0.6586,
"step": 1027
},
{
"epoch": 0.07,
"grad_norm": 1.0203245878219604,
"learning_rate": 9.967726865583578e-06,
"loss": 0.6558,
"step": 1028
},
{
"epoch": 0.07,
"grad_norm": 0.9874720573425293,
"learning_rate": 9.967610376986687e-06,
"loss": 0.651,
"step": 1029
},
{
"epoch": 0.07,
"grad_norm": 1.0147123336791992,
"learning_rate": 9.967493679221006e-06,
"loss": 0.7248,
"step": 1030
},
{
"epoch": 0.07,
"grad_norm": 0.9211717844009399,
"learning_rate": 9.967376772291446e-06,
"loss": 0.6315,
"step": 1031
},
{
"epoch": 0.07,
"grad_norm": 0.9259415864944458,
"learning_rate": 9.96725965620293e-06,
"loss": 0.6483,
"step": 1032
},
{
"epoch": 0.07,
"grad_norm": 0.9673779010772705,
"learning_rate": 9.96714233096039e-06,
"loss": 0.648,
"step": 1033
},
{
"epoch": 0.07,
"grad_norm": 0.9705022573471069,
"learning_rate": 9.967024796568766e-06,
"loss": 0.7079,
"step": 1034
},
{
"epoch": 0.07,
"grad_norm": 0.9708682894706726,
"learning_rate": 9.966907053033006e-06,
"loss": 0.6949,
"step": 1035
},
{
"epoch": 0.07,
"grad_norm": 0.9706398844718933,
"learning_rate": 9.96678910035807e-06,
"loss": 0.6657,
"step": 1036
},
{
"epoch": 0.07,
"grad_norm": 0.9681613445281982,
"learning_rate": 9.966670938548923e-06,
"loss": 0.6909,
"step": 1037
},
{
"epoch": 0.07,
"grad_norm": 1.0291481018066406,
"learning_rate": 9.96655256761054e-06,
"loss": 0.644,
"step": 1038
},
{
"epoch": 0.07,
"grad_norm": 0.9305548667907715,
"learning_rate": 9.966433987547906e-06,
"loss": 0.6166,
"step": 1039
},
{
"epoch": 0.07,
"grad_norm": 0.9347004890441895,
"learning_rate": 9.966315198366011e-06,
"loss": 0.6963,
"step": 1040
},
{
"epoch": 0.07,
"grad_norm": 1.0041406154632568,
"learning_rate": 9.966196200069863e-06,
"loss": 0.6962,
"step": 1041
},
{
"epoch": 0.07,
"grad_norm": 1.0330625772476196,
"learning_rate": 9.966076992664469e-06,
"loss": 0.6894,
"step": 1042
},
{
"epoch": 0.07,
"grad_norm": 1.0031425952911377,
"learning_rate": 9.965957576154848e-06,
"loss": 0.6555,
"step": 1043
},
{
"epoch": 0.07,
"grad_norm": 1.005267858505249,
"learning_rate": 9.96583795054603e-06,
"loss": 0.6799,
"step": 1044
},
{
"epoch": 0.07,
"grad_norm": 0.9837890267372131,
"learning_rate": 9.965718115843048e-06,
"loss": 0.6881,
"step": 1045
},
{
"epoch": 0.07,
"grad_norm": 0.983871340751648,
"learning_rate": 9.965598072050953e-06,
"loss": 0.6925,
"step": 1046
},
{
"epoch": 0.07,
"grad_norm": 0.9536935687065125,
"learning_rate": 9.965477819174796e-06,
"loss": 0.6616,
"step": 1047
},
{
"epoch": 0.07,
"grad_norm": 0.9697441458702087,
"learning_rate": 9.96535735721964e-06,
"loss": 0.6473,
"step": 1048
},
{
"epoch": 0.07,
"grad_norm": 0.9311846494674683,
"learning_rate": 9.965236686190563e-06,
"loss": 0.734,
"step": 1049
},
{
"epoch": 0.07,
"grad_norm": 0.9542213678359985,
"learning_rate": 9.965115806092638e-06,
"loss": 0.6183,
"step": 1050
},
{
"epoch": 0.07,
"grad_norm": 0.940539538860321,
"learning_rate": 9.96499471693096e-06,
"loss": 0.64,
"step": 1051
},
{
"epoch": 0.07,
"grad_norm": 0.9398403763771057,
"learning_rate": 9.964873418710628e-06,
"loss": 0.573,
"step": 1052
},
{
"epoch": 0.07,
"grad_norm": 0.9776535034179688,
"learning_rate": 9.964751911436748e-06,
"loss": 0.6572,
"step": 1053
},
{
"epoch": 0.07,
"grad_norm": 0.9758483171463013,
"learning_rate": 9.964630195114432e-06,
"loss": 0.6424,
"step": 1054
},
{
"epoch": 0.07,
"grad_norm": 0.9551203846931458,
"learning_rate": 9.964508269748814e-06,
"loss": 0.6773,
"step": 1055
},
{
"epoch": 0.07,
"grad_norm": 0.953066885471344,
"learning_rate": 9.96438613534502e-06,
"loss": 0.6551,
"step": 1056
},
{
"epoch": 0.07,
"grad_norm": 1.0139051675796509,
"learning_rate": 9.964263791908198e-06,
"loss": 0.654,
"step": 1057
},
{
"epoch": 0.07,
"grad_norm": 0.9728212952613831,
"learning_rate": 9.964141239443497e-06,
"loss": 0.612,
"step": 1058
},
{
"epoch": 0.07,
"grad_norm": 0.9843549132347107,
"learning_rate": 9.964018477956075e-06,
"loss": 0.6867,
"step": 1059
},
{
"epoch": 0.07,
"grad_norm": 0.9203978776931763,
"learning_rate": 9.963895507451104e-06,
"loss": 0.6567,
"step": 1060
},
{
"epoch": 0.07,
"grad_norm": 0.9760221242904663,
"learning_rate": 9.963772327933764e-06,
"loss": 0.6684,
"step": 1061
},
{
"epoch": 0.07,
"grad_norm": 0.9449279308319092,
"learning_rate": 9.963648939409236e-06,
"loss": 0.6811,
"step": 1062
},
{
"epoch": 0.07,
"grad_norm": 0.8834384083747864,
"learning_rate": 9.96352534188272e-06,
"loss": 0.6195,
"step": 1063
},
{
"epoch": 0.07,
"grad_norm": 0.9562472701072693,
"learning_rate": 9.963401535359418e-06,
"loss": 0.6353,
"step": 1064
},
{
"epoch": 0.07,
"grad_norm": 0.9446646571159363,
"learning_rate": 9.963277519844544e-06,
"loss": 0.617,
"step": 1065
},
{
"epoch": 0.07,
"grad_norm": 1.0598340034484863,
"learning_rate": 9.963153295343319e-06,
"loss": 0.7194,
"step": 1066
},
{
"epoch": 0.07,
"grad_norm": 0.9084300994873047,
"learning_rate": 9.963028861860975e-06,
"loss": 0.6881,
"step": 1067
},
{
"epoch": 0.07,
"grad_norm": 0.9703001976013184,
"learning_rate": 9.962904219402752e-06,
"loss": 0.7235,
"step": 1068
},
{
"epoch": 0.07,
"grad_norm": 0.9932600259780884,
"learning_rate": 9.962779367973896e-06,
"loss": 0.6662,
"step": 1069
},
{
"epoch": 0.07,
"grad_norm": 0.9670122265815735,
"learning_rate": 9.962654307579665e-06,
"loss": 0.6604,
"step": 1070
},
{
"epoch": 0.07,
"grad_norm": 0.9765552282333374,
"learning_rate": 9.962529038225324e-06,
"loss": 0.7055,
"step": 1071
},
{
"epoch": 0.07,
"grad_norm": 0.9492608904838562,
"learning_rate": 9.962403559916149e-06,
"loss": 0.6433,
"step": 1072
},
{
"epoch": 0.07,
"grad_norm": 0.9491517543792725,
"learning_rate": 9.962277872657422e-06,
"loss": 0.6836,
"step": 1073
},
{
"epoch": 0.07,
"grad_norm": 0.950981616973877,
"learning_rate": 9.962151976454439e-06,
"loss": 0.6404,
"step": 1074
},
{
"epoch": 0.07,
"grad_norm": 1.0310513973236084,
"learning_rate": 9.962025871312497e-06,
"loss": 0.7125,
"step": 1075
},
{
"epoch": 0.07,
"grad_norm": 0.9882835149765015,
"learning_rate": 9.961899557236907e-06,
"loss": 0.6861,
"step": 1076
},
{
"epoch": 0.07,
"grad_norm": 0.9338645935058594,
"learning_rate": 9.961773034232987e-06,
"loss": 0.6555,
"step": 1077
},
{
"epoch": 0.07,
"grad_norm": 0.9319179058074951,
"learning_rate": 9.961646302306066e-06,
"loss": 0.6467,
"step": 1078
},
{
"epoch": 0.07,
"grad_norm": 0.9263783097267151,
"learning_rate": 9.961519361461481e-06,
"loss": 0.6384,
"step": 1079
},
{
"epoch": 0.07,
"grad_norm": 0.9706323742866516,
"learning_rate": 9.961392211704573e-06,
"loss": 0.6704,
"step": 1080
},
{
"epoch": 0.07,
"grad_norm": 0.9975467920303345,
"learning_rate": 9.9612648530407e-06,
"loss": 0.6444,
"step": 1081
},
{
"epoch": 0.07,
"grad_norm": 0.9272286295890808,
"learning_rate": 9.961137285475223e-06,
"loss": 0.6558,
"step": 1082
},
{
"epoch": 0.07,
"grad_norm": 0.9486019611358643,
"learning_rate": 9.961009509013512e-06,
"loss": 0.677,
"step": 1083
},
{
"epoch": 0.07,
"grad_norm": 0.8935267329216003,
"learning_rate": 9.96088152366095e-06,
"loss": 0.6781,
"step": 1084
},
{
"epoch": 0.07,
"grad_norm": 0.9707071781158447,
"learning_rate": 9.960753329422925e-06,
"loss": 0.6578,
"step": 1085
},
{
"epoch": 0.07,
"grad_norm": 0.9859130382537842,
"learning_rate": 9.960624926304834e-06,
"loss": 0.6421,
"step": 1086
},
{
"epoch": 0.07,
"grad_norm": 0.9117816090583801,
"learning_rate": 9.960496314312085e-06,
"loss": 0.6667,
"step": 1087
},
{
"epoch": 0.07,
"grad_norm": 0.9339293837547302,
"learning_rate": 9.96036749345009e-06,
"loss": 0.6645,
"step": 1088
},
{
"epoch": 0.07,
"grad_norm": 0.9403766393661499,
"learning_rate": 9.960238463724278e-06,
"loss": 0.6389,
"step": 1089
},
{
"epoch": 0.07,
"grad_norm": 0.9280768632888794,
"learning_rate": 9.96010922514008e-06,
"loss": 0.6599,
"step": 1090
},
{
"epoch": 0.07,
"grad_norm": 0.9073335528373718,
"learning_rate": 9.959979777702935e-06,
"loss": 0.6426,
"step": 1091
},
{
"epoch": 0.07,
"grad_norm": 0.8961593508720398,
"learning_rate": 9.959850121418298e-06,
"loss": 0.6284,
"step": 1092
},
{
"epoch": 0.07,
"grad_norm": 0.9164572954177856,
"learning_rate": 9.959720256291626e-06,
"loss": 0.6326,
"step": 1093
},
{
"epoch": 0.07,
"grad_norm": 0.9504919052124023,
"learning_rate": 9.959590182328387e-06,
"loss": 0.6923,
"step": 1094
},
{
"epoch": 0.07,
"grad_norm": 0.9429534077644348,
"learning_rate": 9.95945989953406e-06,
"loss": 0.6815,
"step": 1095
},
{
"epoch": 0.07,
"grad_norm": 0.9838384389877319,
"learning_rate": 9.959329407914129e-06,
"loss": 0.6425,
"step": 1096
},
{
"epoch": 0.07,
"grad_norm": 0.9929714798927307,
"learning_rate": 9.959198707474087e-06,
"loss": 0.6323,
"step": 1097
},
{
"epoch": 0.07,
"grad_norm": 0.9384214878082275,
"learning_rate": 9.959067798219442e-06,
"loss": 0.6735,
"step": 1098
},
{
"epoch": 0.07,
"grad_norm": 0.9922656416893005,
"learning_rate": 9.958936680155702e-06,
"loss": 0.6697,
"step": 1099
},
{
"epoch": 0.07,
"grad_norm": 0.9068803787231445,
"learning_rate": 9.958805353288388e-06,
"loss": 0.6207,
"step": 1100
},
{
"epoch": 0.07,
"grad_norm": 0.9993635416030884,
"learning_rate": 9.958673817623033e-06,
"loss": 0.6229,
"step": 1101
},
{
"epoch": 0.07,
"grad_norm": 1.0061472654342651,
"learning_rate": 9.958542073165172e-06,
"loss": 0.6769,
"step": 1102
},
{
"epoch": 0.07,
"grad_norm": 0.9775166511535645,
"learning_rate": 9.958410119920355e-06,
"loss": 0.6552,
"step": 1103
},
{
"epoch": 0.07,
"grad_norm": 0.8982160091400146,
"learning_rate": 9.958277957894137e-06,
"loss": 0.635,
"step": 1104
},
{
"epoch": 0.07,
"grad_norm": 0.9272829294204712,
"learning_rate": 9.958145587092082e-06,
"loss": 0.586,
"step": 1105
},
{
"epoch": 0.07,
"grad_norm": 0.9862303137779236,
"learning_rate": 9.958013007519764e-06,
"loss": 0.7076,
"step": 1106
},
{
"epoch": 0.07,
"grad_norm": 0.8938865661621094,
"learning_rate": 9.957880219182767e-06,
"loss": 0.5773,
"step": 1107
},
{
"epoch": 0.07,
"grad_norm": 0.9800034761428833,
"learning_rate": 9.957747222086682e-06,
"loss": 0.7099,
"step": 1108
},
{
"epoch": 0.07,
"grad_norm": 1.0491564273834229,
"learning_rate": 9.957614016237106e-06,
"loss": 0.6926,
"step": 1109
},
{
"epoch": 0.07,
"grad_norm": 0.930266261100769,
"learning_rate": 9.957480601639652e-06,
"loss": 0.7086,
"step": 1110
},
{
"epoch": 0.07,
"grad_norm": 0.9674487113952637,
"learning_rate": 9.957346978299935e-06,
"loss": 0.6541,
"step": 1111
},
{
"epoch": 0.07,
"grad_norm": 1.0485286712646484,
"learning_rate": 9.957213146223581e-06,
"loss": 0.6749,
"step": 1112
},
{
"epoch": 0.07,
"grad_norm": 0.913090169429779,
"learning_rate": 9.957079105416228e-06,
"loss": 0.648,
"step": 1113
},
{
"epoch": 0.07,
"grad_norm": 0.9890965819358826,
"learning_rate": 9.956944855883516e-06,
"loss": 0.6896,
"step": 1114
},
{
"epoch": 0.07,
"grad_norm": 0.9297420978546143,
"learning_rate": 9.956810397631103e-06,
"loss": 0.7024,
"step": 1115
},
{
"epoch": 0.07,
"grad_norm": 1.0065919160842896,
"learning_rate": 9.956675730664646e-06,
"loss": 0.6224,
"step": 1116
},
{
"epoch": 0.07,
"grad_norm": 0.992179274559021,
"learning_rate": 9.956540854989817e-06,
"loss": 0.6667,
"step": 1117
},
{
"epoch": 0.07,
"grad_norm": 0.9736528992652893,
"learning_rate": 9.956405770612295e-06,
"loss": 0.6669,
"step": 1118
},
{
"epoch": 0.07,
"grad_norm": 0.9248270988464355,
"learning_rate": 9.956270477537768e-06,
"loss": 0.6868,
"step": 1119
},
{
"epoch": 0.07,
"grad_norm": 0.9713013172149658,
"learning_rate": 9.956134975771934e-06,
"loss": 0.6949,
"step": 1120
},
{
"epoch": 0.07,
"grad_norm": 1.0682833194732666,
"learning_rate": 9.955999265320495e-06,
"loss": 0.7197,
"step": 1121
},
{
"epoch": 0.07,
"grad_norm": 0.9445773363113403,
"learning_rate": 9.95586334618917e-06,
"loss": 0.6679,
"step": 1122
},
{
"epoch": 0.07,
"grad_norm": 0.9021993279457092,
"learning_rate": 9.95572721838368e-06,
"loss": 0.6233,
"step": 1123
},
{
"epoch": 0.07,
"grad_norm": 0.9834071397781372,
"learning_rate": 9.955590881909753e-06,
"loss": 0.6273,
"step": 1124
},
{
"epoch": 0.07,
"grad_norm": 0.9453941583633423,
"learning_rate": 9.955454336773136e-06,
"loss": 0.6062,
"step": 1125
},
{
"epoch": 0.07,
"grad_norm": 0.8219738602638245,
"learning_rate": 9.955317582979575e-06,
"loss": 0.5888,
"step": 1126
},
{
"epoch": 0.07,
"grad_norm": 0.9637846350669861,
"learning_rate": 9.95518062053483e-06,
"loss": 0.6445,
"step": 1127
},
{
"epoch": 0.07,
"grad_norm": 0.9680708050727844,
"learning_rate": 9.955043449444665e-06,
"loss": 0.6871,
"step": 1128
},
{
"epoch": 0.07,
"grad_norm": 0.8683537840843201,
"learning_rate": 9.95490606971486e-06,
"loss": 0.6406,
"step": 1129
},
{
"epoch": 0.07,
"grad_norm": 0.896604061126709,
"learning_rate": 9.954768481351196e-06,
"loss": 0.6409,
"step": 1130
},
{
"epoch": 0.07,
"grad_norm": 0.9782860279083252,
"learning_rate": 9.954630684359468e-06,
"loss": 0.6409,
"step": 1131
},
{
"epoch": 0.07,
"grad_norm": 1.0023294687271118,
"learning_rate": 9.954492678745477e-06,
"loss": 0.6668,
"step": 1132
},
{
"epoch": 0.07,
"grad_norm": 0.9098303914070129,
"learning_rate": 9.954354464515035e-06,
"loss": 0.6047,
"step": 1133
},
{
"epoch": 0.07,
"grad_norm": 0.9375096559524536,
"learning_rate": 9.95421604167396e-06,
"loss": 0.605,
"step": 1134
},
{
"epoch": 0.07,
"grad_norm": 1.070643663406372,
"learning_rate": 9.954077410228084e-06,
"loss": 0.7336,
"step": 1135
},
{
"epoch": 0.07,
"grad_norm": 0.941253125667572,
"learning_rate": 9.95393857018324e-06,
"loss": 0.6458,
"step": 1136
},
{
"epoch": 0.07,
"grad_norm": 0.9726730585098267,
"learning_rate": 9.95379952154528e-06,
"loss": 0.6416,
"step": 1137
},
{
"epoch": 0.07,
"grad_norm": 1.0929338932037354,
"learning_rate": 9.953660264320053e-06,
"loss": 0.7103,
"step": 1138
},
{
"epoch": 0.07,
"grad_norm": 0.98173588514328,
"learning_rate": 9.953520798513425e-06,
"loss": 0.7664,
"step": 1139
},
{
"epoch": 0.07,
"grad_norm": 0.9463081955909729,
"learning_rate": 9.953381124131269e-06,
"loss": 0.6432,
"step": 1140
},
{
"epoch": 0.07,
"grad_norm": 0.967021644115448,
"learning_rate": 9.953241241179462e-06,
"loss": 0.6412,
"step": 1141
},
{
"epoch": 0.07,
"grad_norm": 0.9214240312576294,
"learning_rate": 9.953101149663902e-06,
"loss": 0.6414,
"step": 1142
},
{
"epoch": 0.07,
"grad_norm": 1.2380410432815552,
"learning_rate": 9.95296084959048e-06,
"loss": 0.5983,
"step": 1143
},
{
"epoch": 0.07,
"grad_norm": 0.9872441291809082,
"learning_rate": 9.952820340965109e-06,
"loss": 0.653,
"step": 1144
},
{
"epoch": 0.07,
"grad_norm": 0.8805240392684937,
"learning_rate": 9.952679623793702e-06,
"loss": 0.6532,
"step": 1145
},
{
"epoch": 0.07,
"grad_norm": 0.9314813017845154,
"learning_rate": 9.952538698082185e-06,
"loss": 0.6649,
"step": 1146
},
{
"epoch": 0.07,
"grad_norm": 0.9278802871704102,
"learning_rate": 9.95239756383649e-06,
"loss": 0.6832,
"step": 1147
},
{
"epoch": 0.07,
"grad_norm": 0.9457370042800903,
"learning_rate": 9.952256221062566e-06,
"loss": 0.5822,
"step": 1148
},
{
"epoch": 0.07,
"grad_norm": 1.0056676864624023,
"learning_rate": 9.952114669766357e-06,
"loss": 0.6653,
"step": 1149
},
{
"epoch": 0.07,
"grad_norm": 0.877746045589447,
"learning_rate": 9.951972909953828e-06,
"loss": 0.5947,
"step": 1150
},
{
"epoch": 0.07,
"grad_norm": 0.949008047580719,
"learning_rate": 9.951830941630946e-06,
"loss": 0.6857,
"step": 1151
},
{
"epoch": 0.07,
"grad_norm": 1.0061525106430054,
"learning_rate": 9.951688764803689e-06,
"loss": 0.7064,
"step": 1152
},
{
"epoch": 0.07,
"grad_norm": 1.1196439266204834,
"learning_rate": 9.951546379478044e-06,
"loss": 0.7059,
"step": 1153
},
{
"epoch": 0.07,
"grad_norm": 1.003239393234253,
"learning_rate": 9.951403785660005e-06,
"loss": 0.7148,
"step": 1154
},
{
"epoch": 0.07,
"grad_norm": 0.9646631479263306,
"learning_rate": 9.95126098335558e-06,
"loss": 0.6991,
"step": 1155
},
{
"epoch": 0.07,
"grad_norm": 0.9327188730239868,
"learning_rate": 9.951117972570776e-06,
"loss": 0.6421,
"step": 1156
},
{
"epoch": 0.07,
"grad_norm": 0.9369673132896423,
"learning_rate": 9.95097475331162e-06,
"loss": 0.7018,
"step": 1157
},
{
"epoch": 0.07,
"grad_norm": 0.9307648539543152,
"learning_rate": 9.950831325584138e-06,
"loss": 0.6523,
"step": 1158
},
{
"epoch": 0.07,
"grad_norm": 0.9541182518005371,
"learning_rate": 9.950687689394373e-06,
"loss": 0.6665,
"step": 1159
},
{
"epoch": 0.07,
"grad_norm": 0.9604858756065369,
"learning_rate": 9.950543844748372e-06,
"loss": 0.6852,
"step": 1160
},
{
"epoch": 0.07,
"grad_norm": 0.9503093957901001,
"learning_rate": 9.950399791652193e-06,
"loss": 0.6652,
"step": 1161
},
{
"epoch": 0.07,
"grad_norm": 1.0395541191101074,
"learning_rate": 9.950255530111896e-06,
"loss": 0.7136,
"step": 1162
},
{
"epoch": 0.07,
"grad_norm": 0.8791881799697876,
"learning_rate": 9.950111060133562e-06,
"loss": 0.6324,
"step": 1163
},
{
"epoch": 0.07,
"grad_norm": 0.8195285201072693,
"learning_rate": 9.94996638172327e-06,
"loss": 0.6415,
"step": 1164
},
{
"epoch": 0.07,
"grad_norm": 1.003089427947998,
"learning_rate": 9.949821494887116e-06,
"loss": 0.6959,
"step": 1165
},
{
"epoch": 0.07,
"grad_norm": 0.9248343706130981,
"learning_rate": 9.949676399631197e-06,
"loss": 0.6429,
"step": 1166
},
{
"epoch": 0.07,
"grad_norm": 0.9678802490234375,
"learning_rate": 9.949531095961621e-06,
"loss": 0.6973,
"step": 1167
},
{
"epoch": 0.07,
"grad_norm": 0.968346118927002,
"learning_rate": 9.94938558388451e-06,
"loss": 0.6681,
"step": 1168
},
{
"epoch": 0.07,
"grad_norm": 0.9063887000083923,
"learning_rate": 9.94923986340599e-06,
"loss": 0.6448,
"step": 1169
},
{
"epoch": 0.07,
"grad_norm": 0.8939194083213806,
"learning_rate": 9.949093934532196e-06,
"loss": 0.6124,
"step": 1170
},
{
"epoch": 0.07,
"grad_norm": 0.9329174757003784,
"learning_rate": 9.948947797269275e-06,
"loss": 0.6408,
"step": 1171
},
{
"epoch": 0.07,
"grad_norm": 0.8747833967208862,
"learning_rate": 9.948801451623376e-06,
"loss": 0.6501,
"step": 1172
},
{
"epoch": 0.07,
"grad_norm": 0.9457252621650696,
"learning_rate": 9.948654897600664e-06,
"loss": 0.6845,
"step": 1173
},
{
"epoch": 0.07,
"grad_norm": 0.9753620028495789,
"learning_rate": 9.94850813520731e-06,
"loss": 0.6558,
"step": 1174
},
{
"epoch": 0.07,
"grad_norm": 0.9322195053100586,
"learning_rate": 9.948361164449493e-06,
"loss": 0.6277,
"step": 1175
},
{
"epoch": 0.07,
"grad_norm": 0.9404197335243225,
"learning_rate": 9.948213985333403e-06,
"loss": 0.6757,
"step": 1176
},
{
"epoch": 0.07,
"grad_norm": 1.0010050535202026,
"learning_rate": 9.948066597865234e-06,
"loss": 0.6567,
"step": 1177
},
{
"epoch": 0.07,
"grad_norm": 0.9383962750434875,
"learning_rate": 9.947919002051194e-06,
"loss": 0.6576,
"step": 1178
},
{
"epoch": 0.07,
"grad_norm": 0.9984327554702759,
"learning_rate": 9.947771197897495e-06,
"loss": 0.6455,
"step": 1179
},
{
"epoch": 0.07,
"grad_norm": 0.968433678150177,
"learning_rate": 9.947623185410366e-06,
"loss": 0.6596,
"step": 1180
},
{
"epoch": 0.07,
"grad_norm": 0.9923335313796997,
"learning_rate": 9.947474964596036e-06,
"loss": 0.7174,
"step": 1181
},
{
"epoch": 0.07,
"grad_norm": 0.9914311170578003,
"learning_rate": 9.947326535460744e-06,
"loss": 0.6712,
"step": 1182
},
{
"epoch": 0.07,
"grad_norm": 0.9265934824943542,
"learning_rate": 9.947177898010745e-06,
"loss": 0.6429,
"step": 1183
},
{
"epoch": 0.08,
"grad_norm": 0.9598346948623657,
"learning_rate": 9.947029052252293e-06,
"loss": 0.633,
"step": 1184
},
{
"epoch": 0.08,
"grad_norm": 0.907012939453125,
"learning_rate": 9.946879998191656e-06,
"loss": 0.6516,
"step": 1185
},
{
"epoch": 0.08,
"grad_norm": 0.9562612771987915,
"learning_rate": 9.946730735835112e-06,
"loss": 0.7378,
"step": 1186
},
{
"epoch": 0.08,
"grad_norm": 0.9579821228981018,
"learning_rate": 9.946581265188947e-06,
"loss": 0.6426,
"step": 1187
},
{
"epoch": 0.08,
"grad_norm": 0.9079206585884094,
"learning_rate": 9.946431586259451e-06,
"loss": 0.6513,
"step": 1188
},
{
"epoch": 0.08,
"grad_norm": 0.9543782472610474,
"learning_rate": 9.946281699052928e-06,
"loss": 0.6829,
"step": 1189
},
{
"epoch": 0.08,
"grad_norm": 0.9957901239395142,
"learning_rate": 9.946131603575691e-06,
"loss": 0.7518,
"step": 1190
},
{
"epoch": 0.08,
"grad_norm": 0.971076250076294,
"learning_rate": 9.945981299834058e-06,
"loss": 0.6697,
"step": 1191
},
{
"epoch": 0.08,
"grad_norm": 0.9861418008804321,
"learning_rate": 9.945830787834358e-06,
"loss": 0.6971,
"step": 1192
},
{
"epoch": 0.08,
"grad_norm": 0.9396786093711853,
"learning_rate": 9.945680067582928e-06,
"loss": 0.6282,
"step": 1193
},
{
"epoch": 0.08,
"grad_norm": 0.8945440053939819,
"learning_rate": 9.945529139086116e-06,
"loss": 0.6315,
"step": 1194
},
{
"epoch": 0.08,
"grad_norm": 1.0057705640792847,
"learning_rate": 9.945378002350277e-06,
"loss": 0.6682,
"step": 1195
},
{
"epoch": 0.08,
"grad_norm": 1.003580093383789,
"learning_rate": 9.945226657381773e-06,
"loss": 0.6859,
"step": 1196
},
{
"epoch": 0.08,
"grad_norm": 0.8602601289749146,
"learning_rate": 9.945075104186978e-06,
"loss": 0.6245,
"step": 1197
},
{
"epoch": 0.08,
"grad_norm": 0.8855172395706177,
"learning_rate": 9.944923342772272e-06,
"loss": 0.656,
"step": 1198
},
{
"epoch": 0.08,
"grad_norm": 0.916766881942749,
"learning_rate": 9.944771373144047e-06,
"loss": 0.635,
"step": 1199
},
{
"epoch": 0.08,
"grad_norm": 0.8637309670448303,
"learning_rate": 9.944619195308701e-06,
"loss": 0.6163,
"step": 1200
},
{
"epoch": 0.08,
"grad_norm": 0.958526611328125,
"learning_rate": 9.944466809272642e-06,
"loss": 0.6275,
"step": 1201
},
{
"epoch": 0.08,
"grad_norm": 0.900386393070221,
"learning_rate": 9.944314215042286e-06,
"loss": 0.6159,
"step": 1202
},
{
"epoch": 0.08,
"grad_norm": 0.9339030385017395,
"learning_rate": 9.944161412624059e-06,
"loss": 0.6542,
"step": 1203
},
{
"epoch": 0.08,
"grad_norm": 0.9049733281135559,
"learning_rate": 9.944008402024395e-06,
"loss": 0.688,
"step": 1204
},
{
"epoch": 0.08,
"grad_norm": 1.030328392982483,
"learning_rate": 9.943855183249734e-06,
"loss": 0.6951,
"step": 1205
},
{
"epoch": 0.08,
"grad_norm": 0.9944655299186707,
"learning_rate": 9.94370175630653e-06,
"loss": 0.7017,
"step": 1206
},
{
"epoch": 0.08,
"grad_norm": 0.9828429222106934,
"learning_rate": 9.943548121201243e-06,
"loss": 0.6717,
"step": 1207
},
{
"epoch": 0.08,
"grad_norm": 0.9837692975997925,
"learning_rate": 9.943394277940344e-06,
"loss": 0.6156,
"step": 1208
},
{
"epoch": 0.08,
"grad_norm": 1.0148766040802002,
"learning_rate": 9.943240226530306e-06,
"loss": 0.7246,
"step": 1209
},
{
"epoch": 0.08,
"grad_norm": 1.0045223236083984,
"learning_rate": 9.94308596697762e-06,
"loss": 0.6648,
"step": 1210
},
{
"epoch": 0.08,
"grad_norm": 0.9967672824859619,
"learning_rate": 9.942931499288779e-06,
"loss": 0.6908,
"step": 1211
},
{
"epoch": 0.08,
"grad_norm": 0.931037187576294,
"learning_rate": 9.942776823470288e-06,
"loss": 0.6155,
"step": 1212
},
{
"epoch": 0.08,
"grad_norm": 0.9639803171157837,
"learning_rate": 9.94262193952866e-06,
"loss": 0.6644,
"step": 1213
},
{
"epoch": 0.08,
"grad_norm": 0.9461570978164673,
"learning_rate": 9.942466847470415e-06,
"loss": 0.6588,
"step": 1214
},
{
"epoch": 0.08,
"grad_norm": 0.8313033580780029,
"learning_rate": 9.942311547302087e-06,
"loss": 0.5843,
"step": 1215
},
{
"epoch": 0.08,
"grad_norm": 0.9389255046844482,
"learning_rate": 9.94215603903021e-06,
"loss": 0.65,
"step": 1216
},
{
"epoch": 0.08,
"grad_norm": 0.9553146362304688,
"learning_rate": 9.942000322661339e-06,
"loss": 0.6667,
"step": 1217
},
{
"epoch": 0.08,
"grad_norm": 0.9988784193992615,
"learning_rate": 9.941844398202022e-06,
"loss": 0.6178,
"step": 1218
},
{
"epoch": 0.08,
"grad_norm": 0.9160767197608948,
"learning_rate": 9.941688265658832e-06,
"loss": 0.6256,
"step": 1219
},
{
"epoch": 0.08,
"grad_norm": 0.9464467167854309,
"learning_rate": 9.941531925038337e-06,
"loss": 0.6528,
"step": 1220
},
{
"epoch": 0.08,
"grad_norm": 0.9644220471382141,
"learning_rate": 9.941375376347124e-06,
"loss": 0.6812,
"step": 1221
},
{
"epoch": 0.08,
"grad_norm": 0.9486405849456787,
"learning_rate": 9.941218619591783e-06,
"loss": 0.6619,
"step": 1222
},
{
"epoch": 0.08,
"grad_norm": 1.004610538482666,
"learning_rate": 9.941061654778917e-06,
"loss": 0.6183,
"step": 1223
},
{
"epoch": 0.08,
"grad_norm": 0.991166889667511,
"learning_rate": 9.940904481915132e-06,
"loss": 0.6616,
"step": 1224
},
{
"epoch": 0.08,
"grad_norm": 0.913848876953125,
"learning_rate": 9.940747101007049e-06,
"loss": 0.6455,
"step": 1225
},
{
"epoch": 0.08,
"grad_norm": 0.8956865072250366,
"learning_rate": 9.940589512061292e-06,
"loss": 0.6446,
"step": 1226
},
{
"epoch": 0.08,
"grad_norm": 0.9563295841217041,
"learning_rate": 9.940431715084498e-06,
"loss": 0.6442,
"step": 1227
},
{
"epoch": 0.08,
"grad_norm": 0.8902249336242676,
"learning_rate": 9.94027371008331e-06,
"loss": 0.63,
"step": 1228
},
{
"epoch": 0.08,
"grad_norm": 0.964384138584137,
"learning_rate": 9.940115497064383e-06,
"loss": 0.679,
"step": 1229
},
{
"epoch": 0.08,
"grad_norm": 1.0036017894744873,
"learning_rate": 9.939957076034379e-06,
"loss": 0.6915,
"step": 1230
},
{
"epoch": 0.08,
"grad_norm": 1.0034871101379395,
"learning_rate": 9.939798446999965e-06,
"loss": 0.6486,
"step": 1231
},
{
"epoch": 0.08,
"grad_norm": 0.8999437689781189,
"learning_rate": 9.939639609967825e-06,
"loss": 0.6805,
"step": 1232
},
{
"epoch": 0.08,
"grad_norm": 0.9058635830879211,
"learning_rate": 9.939480564944642e-06,
"loss": 0.6467,
"step": 1233
},
{
"epoch": 0.08,
"grad_norm": 0.9270319938659668,
"learning_rate": 9.939321311937117e-06,
"loss": 0.636,
"step": 1234
},
{
"epoch": 0.08,
"grad_norm": 0.9657304883003235,
"learning_rate": 9.939161850951955e-06,
"loss": 0.6824,
"step": 1235
},
{
"epoch": 0.08,
"grad_norm": 0.9326258301734924,
"learning_rate": 9.939002181995869e-06,
"loss": 0.6247,
"step": 1236
},
{
"epoch": 0.08,
"grad_norm": 0.8920637965202332,
"learning_rate": 9.938842305075583e-06,
"loss": 0.6373,
"step": 1237
},
{
"epoch": 0.08,
"grad_norm": 0.9409562349319458,
"learning_rate": 9.938682220197828e-06,
"loss": 0.6077,
"step": 1238
},
{
"epoch": 0.08,
"grad_norm": 0.8801417946815491,
"learning_rate": 9.938521927369344e-06,
"loss": 0.6141,
"step": 1239
},
{
"epoch": 0.08,
"grad_norm": 0.9063442945480347,
"learning_rate": 9.938361426596883e-06,
"loss": 0.6628,
"step": 1240
},
{
"epoch": 0.08,
"grad_norm": 0.9930490851402283,
"learning_rate": 9.938200717887202e-06,
"loss": 0.6316,
"step": 1241
},
{
"epoch": 0.08,
"grad_norm": 0.969541609287262,
"learning_rate": 9.938039801247066e-06,
"loss": 0.6512,
"step": 1242
},
{
"epoch": 0.08,
"grad_norm": 1.0176066160202026,
"learning_rate": 9.937878676683254e-06,
"loss": 0.706,
"step": 1243
},
{
"epoch": 0.08,
"grad_norm": 0.9394564032554626,
"learning_rate": 9.937717344202548e-06,
"loss": 0.5894,
"step": 1244
},
{
"epoch": 0.08,
"grad_norm": 0.9627434611320496,
"learning_rate": 9.93755580381174e-06,
"loss": 0.7024,
"step": 1245
},
{
"epoch": 0.08,
"grad_norm": 0.8768373131752014,
"learning_rate": 9.937394055517635e-06,
"loss": 0.6532,
"step": 1246
},
{
"epoch": 0.08,
"grad_norm": 0.941260039806366,
"learning_rate": 9.937232099327044e-06,
"loss": 0.5845,
"step": 1247
},
{
"epoch": 0.08,
"grad_norm": 0.9536455869674683,
"learning_rate": 9.937069935246782e-06,
"loss": 0.6578,
"step": 1248
},
{
"epoch": 0.08,
"grad_norm": 1.0150847434997559,
"learning_rate": 9.93690756328368e-06,
"loss": 0.6162,
"step": 1249
},
{
"epoch": 0.08,
"grad_norm": 0.8984355926513672,
"learning_rate": 9.936744983444576e-06,
"loss": 0.6779,
"step": 1250
},
{
"epoch": 0.08,
"grad_norm": 0.9334084987640381,
"learning_rate": 9.936582195736314e-06,
"loss": 0.6434,
"step": 1251
},
{
"epoch": 0.08,
"grad_norm": 0.9719336628913879,
"learning_rate": 9.936419200165748e-06,
"loss": 0.608,
"step": 1252
},
{
"epoch": 0.08,
"grad_norm": 0.9293937087059021,
"learning_rate": 9.936255996739743e-06,
"loss": 0.6417,
"step": 1253
},
{
"epoch": 0.08,
"grad_norm": 0.9545564651489258,
"learning_rate": 9.93609258546517e-06,
"loss": 0.68,
"step": 1254
},
{
"epoch": 0.08,
"grad_norm": 0.9403777122497559,
"learning_rate": 9.93592896634891e-06,
"loss": 0.6945,
"step": 1255
},
{
"epoch": 0.08,
"grad_norm": 1.053970456123352,
"learning_rate": 9.93576513939785e-06,
"loss": 0.6557,
"step": 1256
},
{
"epoch": 0.08,
"grad_norm": 0.9495333433151245,
"learning_rate": 9.935601104618892e-06,
"loss": 0.7352,
"step": 1257
},
{
"epoch": 0.08,
"grad_norm": 0.9049481153488159,
"learning_rate": 9.93543686201894e-06,
"loss": 0.664,
"step": 1258
},
{
"epoch": 0.08,
"grad_norm": 0.96930330991745,
"learning_rate": 9.935272411604913e-06,
"loss": 0.6956,
"step": 1259
},
{
"epoch": 0.08,
"grad_norm": 0.8767880201339722,
"learning_rate": 9.935107753383733e-06,
"loss": 0.6015,
"step": 1260
},
{
"epoch": 0.08,
"grad_norm": 0.9508046507835388,
"learning_rate": 9.93494288736233e-06,
"loss": 0.6727,
"step": 1261
},
{
"epoch": 0.08,
"grad_norm": 0.9858577847480774,
"learning_rate": 9.934777813547653e-06,
"loss": 0.6565,
"step": 1262
},
{
"epoch": 0.08,
"grad_norm": 0.9373133778572083,
"learning_rate": 9.934612531946648e-06,
"loss": 0.6131,
"step": 1263
},
{
"epoch": 0.08,
"grad_norm": 0.9406293630599976,
"learning_rate": 9.934447042566275e-06,
"loss": 0.6567,
"step": 1264
},
{
"epoch": 0.08,
"grad_norm": 0.990612804889679,
"learning_rate": 9.934281345413504e-06,
"loss": 0.6956,
"step": 1265
},
{
"epoch": 0.08,
"grad_norm": 0.9480006694793701,
"learning_rate": 9.934115440495311e-06,
"loss": 0.6838,
"step": 1266
},
{
"epoch": 0.08,
"grad_norm": 0.9584172964096069,
"learning_rate": 9.93394932781868e-06,
"loss": 0.6654,
"step": 1267
},
{
"epoch": 0.08,
"grad_norm": 0.9526914954185486,
"learning_rate": 9.933783007390608e-06,
"loss": 0.6718,
"step": 1268
},
{
"epoch": 0.08,
"grad_norm": 0.9477076530456543,
"learning_rate": 9.933616479218095e-06,
"loss": 0.6368,
"step": 1269
},
{
"epoch": 0.08,
"grad_norm": 0.9243208765983582,
"learning_rate": 9.933449743308155e-06,
"loss": 0.6717,
"step": 1270
},
{
"epoch": 0.08,
"grad_norm": 0.9876498579978943,
"learning_rate": 9.93328279966781e-06,
"loss": 0.6841,
"step": 1271
},
{
"epoch": 0.08,
"grad_norm": 0.9659183025360107,
"learning_rate": 9.933115648304087e-06,
"loss": 0.6039,
"step": 1272
},
{
"epoch": 0.08,
"grad_norm": 0.8511553406715393,
"learning_rate": 9.932948289224025e-06,
"loss": 0.6388,
"step": 1273
},
{
"epoch": 0.08,
"grad_norm": 0.9214879274368286,
"learning_rate": 9.932780722434671e-06,
"loss": 0.6694,
"step": 1274
},
{
"epoch": 0.08,
"grad_norm": 0.8896194696426392,
"learning_rate": 9.932612947943084e-06,
"loss": 0.6285,
"step": 1275
},
{
"epoch": 0.08,
"grad_norm": 1.1229159832000732,
"learning_rate": 9.932444965756321e-06,
"loss": 0.6201,
"step": 1276
},
{
"epoch": 0.08,
"grad_norm": 0.9542286396026611,
"learning_rate": 9.93227677588146e-06,
"loss": 0.6795,
"step": 1277
},
{
"epoch": 0.08,
"grad_norm": 0.9124268293380737,
"learning_rate": 9.932108378325582e-06,
"loss": 0.6349,
"step": 1278
},
{
"epoch": 0.08,
"grad_norm": 0.8908014893531799,
"learning_rate": 9.931939773095779e-06,
"loss": 0.6293,
"step": 1279
},
{
"epoch": 0.08,
"grad_norm": 0.9545292258262634,
"learning_rate": 9.93177096019915e-06,
"loss": 0.6677,
"step": 1280
},
{
"epoch": 0.08,
"grad_norm": 0.9467611908912659,
"learning_rate": 9.9316019396428e-06,
"loss": 0.7047,
"step": 1281
},
{
"epoch": 0.08,
"grad_norm": 0.8944831490516663,
"learning_rate": 9.931432711433849e-06,
"loss": 0.6674,
"step": 1282
},
{
"epoch": 0.08,
"grad_norm": 0.9431514739990234,
"learning_rate": 9.93126327557942e-06,
"loss": 0.7063,
"step": 1283
},
{
"epoch": 0.08,
"grad_norm": 0.9257122874259949,
"learning_rate": 9.931093632086651e-06,
"loss": 0.6482,
"step": 1284
},
{
"epoch": 0.08,
"grad_norm": 0.916651725769043,
"learning_rate": 9.930923780962683e-06,
"loss": 0.6141,
"step": 1285
},
{
"epoch": 0.08,
"grad_norm": 0.9624120593070984,
"learning_rate": 9.930753722214668e-06,
"loss": 0.6743,
"step": 1286
},
{
"epoch": 0.08,
"grad_norm": 0.8737559914588928,
"learning_rate": 9.930583455849766e-06,
"loss": 0.5961,
"step": 1287
},
{
"epoch": 0.08,
"grad_norm": 1.0165629386901855,
"learning_rate": 9.930412981875148e-06,
"loss": 0.6855,
"step": 1288
},
{
"epoch": 0.08,
"grad_norm": 0.9609097242355347,
"learning_rate": 9.93024230029799e-06,
"loss": 0.7235,
"step": 1289
},
{
"epoch": 0.08,
"grad_norm": 0.9250974059104919,
"learning_rate": 9.93007141112548e-06,
"loss": 0.6568,
"step": 1290
},
{
"epoch": 0.08,
"grad_norm": 0.9859768748283386,
"learning_rate": 9.929900314364813e-06,
"loss": 0.6838,
"step": 1291
},
{
"epoch": 0.08,
"grad_norm": 0.9288014769554138,
"learning_rate": 9.929729010023195e-06,
"loss": 0.6676,
"step": 1292
},
{
"epoch": 0.08,
"grad_norm": 0.9711349010467529,
"learning_rate": 9.929557498107836e-06,
"loss": 0.6951,
"step": 1293
},
{
"epoch": 0.08,
"grad_norm": 0.9767155051231384,
"learning_rate": 9.929385778625959e-06,
"loss": 0.6707,
"step": 1294
},
{
"epoch": 0.08,
"grad_norm": 0.9217318296432495,
"learning_rate": 9.929213851584798e-06,
"loss": 0.6735,
"step": 1295
},
{
"epoch": 0.08,
"grad_norm": 0.9826382398605347,
"learning_rate": 9.929041716991587e-06,
"loss": 0.6452,
"step": 1296
},
{
"epoch": 0.08,
"grad_norm": 0.9821561574935913,
"learning_rate": 9.928869374853576e-06,
"loss": 0.6308,
"step": 1297
},
{
"epoch": 0.08,
"grad_norm": 0.9427945613861084,
"learning_rate": 9.928696825178021e-06,
"loss": 0.6526,
"step": 1298
},
{
"epoch": 0.08,
"grad_norm": 0.9248101711273193,
"learning_rate": 9.92852406797219e-06,
"loss": 0.6777,
"step": 1299
},
{
"epoch": 0.08,
"grad_norm": 1.0439354181289673,
"learning_rate": 9.928351103243356e-06,
"loss": 0.693,
"step": 1300
},
{
"epoch": 0.08,
"grad_norm": 0.8999105095863342,
"learning_rate": 9.928177930998801e-06,
"loss": 0.6325,
"step": 1301
},
{
"epoch": 0.08,
"grad_norm": 0.9729776382446289,
"learning_rate": 9.928004551245818e-06,
"loss": 0.6127,
"step": 1302
},
{
"epoch": 0.08,
"grad_norm": 0.9380604028701782,
"learning_rate": 9.927830963991704e-06,
"loss": 0.6486,
"step": 1303
},
{
"epoch": 0.08,
"grad_norm": 0.9806588888168335,
"learning_rate": 9.927657169243773e-06,
"loss": 0.7019,
"step": 1304
},
{
"epoch": 0.08,
"grad_norm": 0.9600833654403687,
"learning_rate": 9.92748316700934e-06,
"loss": 0.7007,
"step": 1305
},
{
"epoch": 0.08,
"grad_norm": 0.8898829817771912,
"learning_rate": 9.927308957295733e-06,
"loss": 0.6332,
"step": 1306
},
{
"epoch": 0.08,
"grad_norm": 0.9268112182617188,
"learning_rate": 9.927134540110286e-06,
"loss": 0.6576,
"step": 1307
},
{
"epoch": 0.08,
"grad_norm": 0.8421509861946106,
"learning_rate": 9.926959915460344e-06,
"loss": 0.6011,
"step": 1308
},
{
"epoch": 0.08,
"grad_norm": 0.8830382823944092,
"learning_rate": 9.926785083353258e-06,
"loss": 0.5837,
"step": 1309
},
{
"epoch": 0.08,
"grad_norm": 0.9646912217140198,
"learning_rate": 9.926610043796394e-06,
"loss": 0.6313,
"step": 1310
},
{
"epoch": 0.08,
"grad_norm": 0.9010607004165649,
"learning_rate": 9.926434796797117e-06,
"loss": 0.622,
"step": 1311
},
{
"epoch": 0.08,
"grad_norm": 0.976517379283905,
"learning_rate": 9.92625934236281e-06,
"loss": 0.6945,
"step": 1312
},
{
"epoch": 0.08,
"grad_norm": 1.0138803720474243,
"learning_rate": 9.92608368050086e-06,
"loss": 0.6774,
"step": 1313
},
{
"epoch": 0.08,
"grad_norm": 0.9496868848800659,
"learning_rate": 9.925907811218661e-06,
"loss": 0.6395,
"step": 1314
},
{
"epoch": 0.08,
"grad_norm": 0.9517762064933777,
"learning_rate": 9.925731734523621e-06,
"loss": 0.6851,
"step": 1315
},
{
"epoch": 0.08,
"grad_norm": 0.8854117393493652,
"learning_rate": 9.925555450423153e-06,
"loss": 0.6039,
"step": 1316
},
{
"epoch": 0.08,
"grad_norm": 0.975888729095459,
"learning_rate": 9.92537895892468e-06,
"loss": 0.6392,
"step": 1317
},
{
"epoch": 0.08,
"grad_norm": 0.9728052616119385,
"learning_rate": 9.925202260035632e-06,
"loss": 0.6934,
"step": 1318
},
{
"epoch": 0.08,
"grad_norm": 0.8676881194114685,
"learning_rate": 9.925025353763452e-06,
"loss": 0.608,
"step": 1319
},
{
"epoch": 0.08,
"grad_norm": 0.9743890762329102,
"learning_rate": 9.924848240115585e-06,
"loss": 0.6726,
"step": 1320
},
{
"epoch": 0.08,
"grad_norm": 0.8968915939331055,
"learning_rate": 9.924670919099493e-06,
"loss": 0.6049,
"step": 1321
},
{
"epoch": 0.08,
"grad_norm": 0.8923019766807556,
"learning_rate": 9.92449339072264e-06,
"loss": 0.6528,
"step": 1322
},
{
"epoch": 0.08,
"grad_norm": 0.9763730764389038,
"learning_rate": 9.924315654992501e-06,
"loss": 0.6355,
"step": 1323
},
{
"epoch": 0.08,
"grad_norm": 0.9337408542633057,
"learning_rate": 9.924137711916559e-06,
"loss": 0.6283,
"step": 1324
},
{
"epoch": 0.08,
"grad_norm": 0.9374693036079407,
"learning_rate": 9.92395956150231e-06,
"loss": 0.6538,
"step": 1325
},
{
"epoch": 0.08,
"grad_norm": 1.0116811990737915,
"learning_rate": 9.923781203757253e-06,
"loss": 0.6373,
"step": 1326
},
{
"epoch": 0.08,
"grad_norm": 0.9618993997573853,
"learning_rate": 9.923602638688897e-06,
"loss": 0.6516,
"step": 1327
},
{
"epoch": 0.08,
"grad_norm": 0.9200368523597717,
"learning_rate": 9.923423866304761e-06,
"loss": 0.6495,
"step": 1328
},
{
"epoch": 0.08,
"grad_norm": 0.9342839121818542,
"learning_rate": 9.923244886612375e-06,
"loss": 0.6653,
"step": 1329
},
{
"epoch": 0.08,
"grad_norm": 0.9107709527015686,
"learning_rate": 9.923065699619273e-06,
"loss": 0.6235,
"step": 1330
},
{
"epoch": 0.08,
"grad_norm": 0.9179040193557739,
"learning_rate": 9.922886305333e-06,
"loss": 0.6449,
"step": 1331
},
{
"epoch": 0.08,
"grad_norm": 0.9737808108329773,
"learning_rate": 9.922706703761111e-06,
"loss": 0.6355,
"step": 1332
},
{
"epoch": 0.08,
"grad_norm": 0.9478714466094971,
"learning_rate": 9.922526894911166e-06,
"loss": 0.6301,
"step": 1333
},
{
"epoch": 0.08,
"grad_norm": 0.9034336805343628,
"learning_rate": 9.922346878790739e-06,
"loss": 0.6711,
"step": 1334
},
{
"epoch": 0.08,
"grad_norm": 0.9243572354316711,
"learning_rate": 9.922166655407408e-06,
"loss": 0.6703,
"step": 1335
},
{
"epoch": 0.08,
"grad_norm": 0.9634230136871338,
"learning_rate": 9.921986224768762e-06,
"loss": 0.629,
"step": 1336
},
{
"epoch": 0.08,
"grad_norm": 0.975191593170166,
"learning_rate": 9.9218055868824e-06,
"loss": 0.6973,
"step": 1337
},
{
"epoch": 0.08,
"grad_norm": 0.9010855555534363,
"learning_rate": 9.921624741755924e-06,
"loss": 0.6279,
"step": 1338
},
{
"epoch": 0.08,
"grad_norm": 0.9838567972183228,
"learning_rate": 9.921443689396952e-06,
"loss": 0.6527,
"step": 1339
},
{
"epoch": 0.08,
"grad_norm": 0.9376393556594849,
"learning_rate": 9.921262429813107e-06,
"loss": 0.5999,
"step": 1340
},
{
"epoch": 0.08,
"grad_norm": 1.01847505569458,
"learning_rate": 9.921080963012021e-06,
"loss": 0.637,
"step": 1341
},
{
"epoch": 0.09,
"grad_norm": 0.9339948892593384,
"learning_rate": 9.920899289001335e-06,
"loss": 0.7109,
"step": 1342
},
{
"epoch": 0.09,
"grad_norm": 0.8605727553367615,
"learning_rate": 9.9207174077887e-06,
"loss": 0.6148,
"step": 1343
},
{
"epoch": 0.09,
"grad_norm": 0.9776952862739563,
"learning_rate": 9.92053531938177e-06,
"loss": 0.6101,
"step": 1344
},
{
"epoch": 0.09,
"grad_norm": 0.9384501576423645,
"learning_rate": 9.920353023788216e-06,
"loss": 0.6861,
"step": 1345
},
{
"epoch": 0.09,
"grad_norm": 1.0112212896347046,
"learning_rate": 9.920170521015714e-06,
"loss": 0.6201,
"step": 1346
},
{
"epoch": 0.09,
"grad_norm": 0.993434488773346,
"learning_rate": 9.919987811071946e-06,
"loss": 0.7198,
"step": 1347
},
{
"epoch": 0.09,
"grad_norm": 1.0135927200317383,
"learning_rate": 9.919804893964607e-06,
"loss": 0.7262,
"step": 1348
},
{
"epoch": 0.09,
"grad_norm": 0.9492275714874268,
"learning_rate": 9.9196217697014e-06,
"loss": 0.6953,
"step": 1349
},
{
"epoch": 0.09,
"grad_norm": 0.9331912398338318,
"learning_rate": 9.919438438290032e-06,
"loss": 0.6936,
"step": 1350
},
{
"epoch": 0.09,
"grad_norm": 0.9214736223220825,
"learning_rate": 9.919254899738227e-06,
"loss": 0.6333,
"step": 1351
},
{
"epoch": 0.09,
"grad_norm": 0.921393096446991,
"learning_rate": 9.91907115405371e-06,
"loss": 0.6357,
"step": 1352
},
{
"epoch": 0.09,
"grad_norm": 0.867919385433197,
"learning_rate": 9.918887201244219e-06,
"loss": 0.6465,
"step": 1353
},
{
"epoch": 0.09,
"grad_norm": 0.9736660718917847,
"learning_rate": 9.918703041317498e-06,
"loss": 0.6553,
"step": 1354
},
{
"epoch": 0.09,
"grad_norm": 0.9158203601837158,
"learning_rate": 9.918518674281305e-06,
"loss": 0.6443,
"step": 1355
},
{
"epoch": 0.09,
"grad_norm": 0.9227981567382812,
"learning_rate": 9.9183341001434e-06,
"loss": 0.6795,
"step": 1356
},
{
"epoch": 0.09,
"grad_norm": 0.9615574479103088,
"learning_rate": 9.918149318911557e-06,
"loss": 0.6565,
"step": 1357
},
{
"epoch": 0.09,
"grad_norm": 0.9969097375869751,
"learning_rate": 9.917964330593553e-06,
"loss": 0.653,
"step": 1358
},
{
"epoch": 0.09,
"grad_norm": 0.9658487439155579,
"learning_rate": 9.917779135197181e-06,
"loss": 0.6578,
"step": 1359
},
{
"epoch": 0.09,
"grad_norm": 0.9004530906677246,
"learning_rate": 9.917593732730236e-06,
"loss": 0.6623,
"step": 1360
},
{
"epoch": 0.09,
"grad_norm": 1.01588773727417,
"learning_rate": 9.917408123200527e-06,
"loss": 0.6957,
"step": 1361
},
{
"epoch": 0.09,
"grad_norm": 0.9513044357299805,
"learning_rate": 9.917222306615868e-06,
"loss": 0.5935,
"step": 1362
},
{
"epoch": 0.09,
"grad_norm": 0.9240687489509583,
"learning_rate": 9.917036282984084e-06,
"loss": 0.6705,
"step": 1363
},
{
"epoch": 0.09,
"grad_norm": 0.9190331697463989,
"learning_rate": 9.916850052313007e-06,
"loss": 0.6988,
"step": 1364
},
{
"epoch": 0.09,
"grad_norm": 0.9454796314239502,
"learning_rate": 9.916663614610478e-06,
"loss": 0.6126,
"step": 1365
},
{
"epoch": 0.09,
"grad_norm": 0.930030345916748,
"learning_rate": 9.916476969884348e-06,
"loss": 0.6659,
"step": 1366
},
{
"epoch": 0.09,
"grad_norm": 0.8999435901641846,
"learning_rate": 9.916290118142478e-06,
"loss": 0.6424,
"step": 1367
},
{
"epoch": 0.09,
"grad_norm": 0.9197795987129211,
"learning_rate": 9.91610305939273e-06,
"loss": 0.6463,
"step": 1368
},
{
"epoch": 0.09,
"grad_norm": 0.8855273127555847,
"learning_rate": 9.915915793642987e-06,
"loss": 0.6438,
"step": 1369
},
{
"epoch": 0.09,
"grad_norm": 0.992560863494873,
"learning_rate": 9.91572832090113e-06,
"loss": 0.6389,
"step": 1370
},
{
"epoch": 0.09,
"grad_norm": 0.9714159965515137,
"learning_rate": 9.915540641175055e-06,
"loss": 0.6747,
"step": 1371
},
{
"epoch": 0.09,
"grad_norm": 0.9169105291366577,
"learning_rate": 9.915352754472662e-06,
"loss": 0.6518,
"step": 1372
},
{
"epoch": 0.09,
"grad_norm": 0.9008778929710388,
"learning_rate": 9.915164660801865e-06,
"loss": 0.6317,
"step": 1373
},
{
"epoch": 0.09,
"grad_norm": 0.8850517272949219,
"learning_rate": 9.914976360170583e-06,
"loss": 0.6009,
"step": 1374
},
{
"epoch": 0.09,
"grad_norm": 0.9355595707893372,
"learning_rate": 9.914787852586744e-06,
"loss": 0.6217,
"step": 1375
},
{
"epoch": 0.09,
"grad_norm": 0.9078220129013062,
"learning_rate": 9.914599138058285e-06,
"loss": 0.6433,
"step": 1376
},
{
"epoch": 0.09,
"grad_norm": 0.9317836761474609,
"learning_rate": 9.914410216593154e-06,
"loss": 0.6114,
"step": 1377
},
{
"epoch": 0.09,
"grad_norm": 0.941766083240509,
"learning_rate": 9.914221088199304e-06,
"loss": 0.6318,
"step": 1378
},
{
"epoch": 0.09,
"grad_norm": 0.9302542805671692,
"learning_rate": 9.9140317528847e-06,
"loss": 0.6385,
"step": 1379
},
{
"epoch": 0.09,
"grad_norm": 0.9480794072151184,
"learning_rate": 9.913842210657314e-06,
"loss": 0.6457,
"step": 1380
},
{
"epoch": 0.09,
"grad_norm": 0.9591321349143982,
"learning_rate": 9.913652461525126e-06,
"loss": 0.6889,
"step": 1381
},
{
"epoch": 0.09,
"grad_norm": 0.9544585347175598,
"learning_rate": 9.913462505496126e-06,
"loss": 0.6533,
"step": 1382
},
{
"epoch": 0.09,
"grad_norm": 0.9712944030761719,
"learning_rate": 9.913272342578312e-06,
"loss": 0.6232,
"step": 1383
},
{
"epoch": 0.09,
"grad_norm": 0.9564849138259888,
"learning_rate": 9.913081972779692e-06,
"loss": 0.6481,
"step": 1384
},
{
"epoch": 0.09,
"grad_norm": 0.9358051419258118,
"learning_rate": 9.912891396108281e-06,
"loss": 0.6599,
"step": 1385
},
{
"epoch": 0.09,
"grad_norm": 0.9481112360954285,
"learning_rate": 9.912700612572106e-06,
"loss": 0.6148,
"step": 1386
},
{
"epoch": 0.09,
"grad_norm": 0.9976912140846252,
"learning_rate": 9.912509622179197e-06,
"loss": 0.6802,
"step": 1387
},
{
"epoch": 0.09,
"grad_norm": 0.9644153118133545,
"learning_rate": 9.912318424937596e-06,
"loss": 0.674,
"step": 1388
},
{
"epoch": 0.09,
"grad_norm": 0.9347633719444275,
"learning_rate": 9.912127020855356e-06,
"loss": 0.6715,
"step": 1389
},
{
"epoch": 0.09,
"grad_norm": 0.9877498149871826,
"learning_rate": 9.911935409940536e-06,
"loss": 0.6818,
"step": 1390
},
{
"epoch": 0.09,
"grad_norm": 0.999534010887146,
"learning_rate": 9.911743592201203e-06,
"loss": 0.6524,
"step": 1391
},
{
"epoch": 0.09,
"grad_norm": 0.9707819223403931,
"learning_rate": 9.911551567645433e-06,
"loss": 0.6186,
"step": 1392
},
{
"epoch": 0.09,
"grad_norm": 0.9409770965576172,
"learning_rate": 9.911359336281312e-06,
"loss": 0.6754,
"step": 1393
},
{
"epoch": 0.09,
"grad_norm": 0.9744927883148193,
"learning_rate": 9.911166898116935e-06,
"loss": 0.6842,
"step": 1394
},
{
"epoch": 0.09,
"grad_norm": 0.8977870941162109,
"learning_rate": 9.910974253160405e-06,
"loss": 0.6315,
"step": 1395
},
{
"epoch": 0.09,
"grad_norm": 0.9655022025108337,
"learning_rate": 9.910781401419835e-06,
"loss": 0.6493,
"step": 1396
},
{
"epoch": 0.09,
"grad_norm": 0.9334004521369934,
"learning_rate": 9.910588342903342e-06,
"loss": 0.6679,
"step": 1397
},
{
"epoch": 0.09,
"grad_norm": 0.9170674085617065,
"learning_rate": 9.910395077619057e-06,
"loss": 0.652,
"step": 1398
},
{
"epoch": 0.09,
"grad_norm": 0.8829054832458496,
"learning_rate": 9.910201605575116e-06,
"loss": 0.5469,
"step": 1399
},
{
"epoch": 0.09,
"grad_norm": 0.9742071032524109,
"learning_rate": 9.910007926779669e-06,
"loss": 0.6423,
"step": 1400
},
{
"epoch": 0.09,
"grad_norm": 0.9407263398170471,
"learning_rate": 9.909814041240867e-06,
"loss": 0.7067,
"step": 1401
},
{
"epoch": 0.09,
"grad_norm": 0.9770819544792175,
"learning_rate": 9.909619948966875e-06,
"loss": 0.6211,
"step": 1402
},
{
"epoch": 0.09,
"grad_norm": 0.8862954378128052,
"learning_rate": 9.909425649965869e-06,
"loss": 0.6222,
"step": 1403
},
{
"epoch": 0.09,
"grad_norm": 0.8932839035987854,
"learning_rate": 9.909231144246026e-06,
"loss": 0.6406,
"step": 1404
},
{
"epoch": 0.09,
"grad_norm": 0.972990870475769,
"learning_rate": 9.909036431815538e-06,
"loss": 0.6454,
"step": 1405
},
{
"epoch": 0.09,
"grad_norm": 0.9712089896202087,
"learning_rate": 9.908841512682602e-06,
"loss": 0.636,
"step": 1406
},
{
"epoch": 0.09,
"grad_norm": 0.9432918429374695,
"learning_rate": 9.908646386855427e-06,
"loss": 0.639,
"step": 1407
},
{
"epoch": 0.09,
"grad_norm": 0.896690845489502,
"learning_rate": 9.90845105434223e-06,
"loss": 0.6778,
"step": 1408
},
{
"epoch": 0.09,
"grad_norm": 0.8984845280647278,
"learning_rate": 9.908255515151232e-06,
"loss": 0.6641,
"step": 1409
},
{
"epoch": 0.09,
"grad_norm": 0.9165180921554565,
"learning_rate": 9.90805976929067e-06,
"loss": 0.6136,
"step": 1410
},
{
"epoch": 0.09,
"grad_norm": 0.9825322031974792,
"learning_rate": 9.907863816768786e-06,
"loss": 0.68,
"step": 1411
},
{
"epoch": 0.09,
"grad_norm": 0.9669419527053833,
"learning_rate": 9.907667657593828e-06,
"loss": 0.6393,
"step": 1412
},
{
"epoch": 0.09,
"grad_norm": 0.9365913271903992,
"learning_rate": 9.907471291774058e-06,
"loss": 0.6369,
"step": 1413
},
{
"epoch": 0.09,
"grad_norm": 0.9577059745788574,
"learning_rate": 9.907274719317746e-06,
"loss": 0.6428,
"step": 1414
},
{
"epoch": 0.09,
"grad_norm": 0.9230369329452515,
"learning_rate": 9.907077940233162e-06,
"loss": 0.6102,
"step": 1415
},
{
"epoch": 0.09,
"grad_norm": 1.0820754766464233,
"learning_rate": 9.906880954528601e-06,
"loss": 0.7506,
"step": 1416
},
{
"epoch": 0.09,
"grad_norm": 0.98191237449646,
"learning_rate": 9.90668376221235e-06,
"loss": 0.6625,
"step": 1417
},
{
"epoch": 0.09,
"grad_norm": 1.2556596994400024,
"learning_rate": 9.906486363292718e-06,
"loss": 0.6521,
"step": 1418
},
{
"epoch": 0.09,
"grad_norm": 0.9278602004051208,
"learning_rate": 9.906288757778012e-06,
"loss": 0.6897,
"step": 1419
},
{
"epoch": 0.09,
"grad_norm": 0.932048499584198,
"learning_rate": 9.906090945676552e-06,
"loss": 0.6475,
"step": 1420
},
{
"epoch": 0.09,
"grad_norm": 0.9188955426216125,
"learning_rate": 9.905892926996672e-06,
"loss": 0.6607,
"step": 1421
},
{
"epoch": 0.09,
"grad_norm": 0.8783012628555298,
"learning_rate": 9.905694701746706e-06,
"loss": 0.67,
"step": 1422
},
{
"epoch": 0.09,
"grad_norm": 0.9844834208488464,
"learning_rate": 9.905496269935002e-06,
"loss": 0.6815,
"step": 1423
},
{
"epoch": 0.09,
"grad_norm": 0.8767088055610657,
"learning_rate": 9.905297631569915e-06,
"loss": 0.6505,
"step": 1424
},
{
"epoch": 0.09,
"grad_norm": 0.9569482207298279,
"learning_rate": 9.905098786659809e-06,
"loss": 0.6456,
"step": 1425
},
{
"epoch": 0.09,
"grad_norm": 0.991873562335968,
"learning_rate": 9.904899735213058e-06,
"loss": 0.6747,
"step": 1426
},
{
"epoch": 0.09,
"grad_norm": 0.8848119974136353,
"learning_rate": 9.90470047723804e-06,
"loss": 0.6224,
"step": 1427
},
{
"epoch": 0.09,
"grad_norm": 0.8410341143608093,
"learning_rate": 9.904501012743149e-06,
"loss": 0.5621,
"step": 1428
},
{
"epoch": 0.09,
"grad_norm": 0.951511561870575,
"learning_rate": 9.90430134173678e-06,
"loss": 0.651,
"step": 1429
},
{
"epoch": 0.09,
"grad_norm": 0.93990159034729,
"learning_rate": 9.904101464227342e-06,
"loss": 0.6245,
"step": 1430
},
{
"epoch": 0.09,
"grad_norm": 0.9799292087554932,
"learning_rate": 9.903901380223254e-06,
"loss": 0.6582,
"step": 1431
},
{
"epoch": 0.09,
"grad_norm": 0.9411140084266663,
"learning_rate": 9.903701089732937e-06,
"loss": 0.6366,
"step": 1432
},
{
"epoch": 0.09,
"grad_norm": 0.8953483700752258,
"learning_rate": 9.903500592764825e-06,
"loss": 0.5984,
"step": 1433
},
{
"epoch": 0.09,
"grad_norm": 0.9429217576980591,
"learning_rate": 9.903299889327362e-06,
"loss": 0.6379,
"step": 1434
},
{
"epoch": 0.09,
"grad_norm": 0.9783498644828796,
"learning_rate": 9.903098979428998e-06,
"loss": 0.6302,
"step": 1435
},
{
"epoch": 0.09,
"grad_norm": 0.9483500719070435,
"learning_rate": 9.902897863078192e-06,
"loss": 0.5857,
"step": 1436
},
{
"epoch": 0.09,
"grad_norm": 0.9564317464828491,
"learning_rate": 9.902696540283414e-06,
"loss": 0.6902,
"step": 1437
},
{
"epoch": 0.09,
"grad_norm": 0.8706897497177124,
"learning_rate": 9.90249501105314e-06,
"loss": 0.5932,
"step": 1438
},
{
"epoch": 0.09,
"grad_norm": 0.913366436958313,
"learning_rate": 9.902293275395854e-06,
"loss": 0.6527,
"step": 1439
},
{
"epoch": 0.09,
"grad_norm": 0.9667909741401672,
"learning_rate": 9.902091333320053e-06,
"loss": 0.6133,
"step": 1440
},
{
"epoch": 0.09,
"grad_norm": 0.9342280626296997,
"learning_rate": 9.90188918483424e-06,
"loss": 0.6631,
"step": 1441
},
{
"epoch": 0.09,
"grad_norm": 0.9814968705177307,
"learning_rate": 9.901686829946924e-06,
"loss": 0.6715,
"step": 1442
},
{
"epoch": 0.09,
"grad_norm": 0.9427680373191833,
"learning_rate": 9.901484268666628e-06,
"loss": 0.6623,
"step": 1443
},
{
"epoch": 0.09,
"grad_norm": 0.9245345592498779,
"learning_rate": 9.90128150100188e-06,
"loss": 0.7061,
"step": 1444
},
{
"epoch": 0.09,
"grad_norm": 0.9452770948410034,
"learning_rate": 9.90107852696122e-06,
"loss": 0.6333,
"step": 1445
},
{
"epoch": 0.09,
"grad_norm": 1.0394012928009033,
"learning_rate": 9.900875346553192e-06,
"loss": 0.675,
"step": 1446
},
{
"epoch": 0.09,
"grad_norm": 0.9171515107154846,
"learning_rate": 9.900671959786352e-06,
"loss": 0.6535,
"step": 1447
},
{
"epoch": 0.09,
"grad_norm": 0.9841604828834534,
"learning_rate": 9.900468366669264e-06,
"loss": 0.6465,
"step": 1448
},
{
"epoch": 0.09,
"grad_norm": 0.9761607050895691,
"learning_rate": 9.900264567210501e-06,
"loss": 0.6161,
"step": 1449
},
{
"epoch": 0.09,
"grad_norm": 0.8922892808914185,
"learning_rate": 9.900060561418643e-06,
"loss": 0.6091,
"step": 1450
},
{
"epoch": 0.09,
"grad_norm": 0.984734833240509,
"learning_rate": 9.89985634930228e-06,
"loss": 0.6684,
"step": 1451
},
{
"epoch": 0.09,
"grad_norm": 0.9379397630691528,
"learning_rate": 9.899651930870014e-06,
"loss": 0.6442,
"step": 1452
},
{
"epoch": 0.09,
"grad_norm": 0.9366482496261597,
"learning_rate": 9.899447306130447e-06,
"loss": 0.6081,
"step": 1453
},
{
"epoch": 0.09,
"grad_norm": 1.0072358846664429,
"learning_rate": 9.8992424750922e-06,
"loss": 0.6793,
"step": 1454
},
{
"epoch": 0.09,
"grad_norm": 0.9230679869651794,
"learning_rate": 9.899037437763894e-06,
"loss": 0.6299,
"step": 1455
},
{
"epoch": 0.09,
"grad_norm": 0.9557128548622131,
"learning_rate": 9.898832194154165e-06,
"loss": 0.6412,
"step": 1456
},
{
"epoch": 0.09,
"grad_norm": 0.8996137976646423,
"learning_rate": 9.898626744271654e-06,
"loss": 0.5912,
"step": 1457
},
{
"epoch": 0.09,
"grad_norm": 0.9441683292388916,
"learning_rate": 9.898421088125012e-06,
"loss": 0.6139,
"step": 1458
},
{
"epoch": 0.09,
"grad_norm": 0.9505671858787537,
"learning_rate": 9.898215225722899e-06,
"loss": 0.6811,
"step": 1459
},
{
"epoch": 0.09,
"grad_norm": 1.0145865678787231,
"learning_rate": 9.898009157073982e-06,
"loss": 0.6746,
"step": 1460
},
{
"epoch": 0.09,
"grad_norm": 0.9370318651199341,
"learning_rate": 9.897802882186938e-06,
"loss": 0.6384,
"step": 1461
},
{
"epoch": 0.09,
"grad_norm": 0.890518307685852,
"learning_rate": 9.897596401070452e-06,
"loss": 0.6382,
"step": 1462
},
{
"epoch": 0.09,
"grad_norm": 0.8634839653968811,
"learning_rate": 9.89738971373322e-06,
"loss": 0.6107,
"step": 1463
},
{
"epoch": 0.09,
"grad_norm": 0.9820486307144165,
"learning_rate": 9.897182820183944e-06,
"loss": 0.6614,
"step": 1464
},
{
"epoch": 0.09,
"grad_norm": 0.9493029117584229,
"learning_rate": 9.896975720431334e-06,
"loss": 0.6184,
"step": 1465
},
{
"epoch": 0.09,
"grad_norm": 0.9800208210945129,
"learning_rate": 9.896768414484115e-06,
"loss": 0.6639,
"step": 1466
},
{
"epoch": 0.09,
"grad_norm": 0.9691864848136902,
"learning_rate": 9.896560902351009e-06,
"loss": 0.6536,
"step": 1467
},
{
"epoch": 0.09,
"grad_norm": 0.964766263961792,
"learning_rate": 9.89635318404076e-06,
"loss": 0.6796,
"step": 1468
},
{
"epoch": 0.09,
"grad_norm": 0.8742868900299072,
"learning_rate": 9.896145259562111e-06,
"loss": 0.6627,
"step": 1469
},
{
"epoch": 0.09,
"grad_norm": 0.9007435441017151,
"learning_rate": 9.895937128923816e-06,
"loss": 0.6388,
"step": 1470
},
{
"epoch": 0.09,
"grad_norm": 0.931812584400177,
"learning_rate": 9.895728792134642e-06,
"loss": 0.6514,
"step": 1471
},
{
"epoch": 0.09,
"grad_norm": 0.9516260027885437,
"learning_rate": 9.895520249203358e-06,
"loss": 0.6737,
"step": 1472
},
{
"epoch": 0.09,
"grad_norm": 0.9379490613937378,
"learning_rate": 9.895311500138749e-06,
"loss": 0.6273,
"step": 1473
},
{
"epoch": 0.09,
"grad_norm": 0.9107145667076111,
"learning_rate": 9.8951025449496e-06,
"loss": 0.6562,
"step": 1474
},
{
"epoch": 0.09,
"grad_norm": 0.8951176404953003,
"learning_rate": 9.894893383644713e-06,
"loss": 0.5977,
"step": 1475
},
{
"epoch": 0.09,
"grad_norm": 0.9197559356689453,
"learning_rate": 9.894684016232893e-06,
"loss": 0.6614,
"step": 1476
},
{
"epoch": 0.09,
"grad_norm": 0.9731332063674927,
"learning_rate": 9.894474442722956e-06,
"loss": 0.6992,
"step": 1477
},
{
"epoch": 0.09,
"grad_norm": 0.9306113719940186,
"learning_rate": 9.89426466312373e-06,
"loss": 0.6351,
"step": 1478
},
{
"epoch": 0.09,
"grad_norm": 0.9742302298545837,
"learning_rate": 9.89405467744404e-06,
"loss": 0.7297,
"step": 1479
},
{
"epoch": 0.09,
"grad_norm": 0.891633927822113,
"learning_rate": 9.893844485692736e-06,
"loss": 0.6004,
"step": 1480
},
{
"epoch": 0.09,
"grad_norm": 1.000712275505066,
"learning_rate": 9.893634087878665e-06,
"loss": 0.6486,
"step": 1481
},
{
"epoch": 0.09,
"grad_norm": 0.9697219729423523,
"learning_rate": 9.893423484010685e-06,
"loss": 0.6353,
"step": 1482
},
{
"epoch": 0.09,
"grad_norm": 0.9428305625915527,
"learning_rate": 9.893212674097666e-06,
"loss": 0.6327,
"step": 1483
},
{
"epoch": 0.09,
"grad_norm": 0.8775177001953125,
"learning_rate": 9.893001658148482e-06,
"loss": 0.5795,
"step": 1484
},
{
"epoch": 0.09,
"grad_norm": 0.8842799663543701,
"learning_rate": 9.892790436172022e-06,
"loss": 0.6095,
"step": 1485
},
{
"epoch": 0.09,
"grad_norm": 0.9339465498924255,
"learning_rate": 9.892579008177176e-06,
"loss": 0.6589,
"step": 1486
},
{
"epoch": 0.09,
"grad_norm": 0.940681517124176,
"learning_rate": 9.892367374172849e-06,
"loss": 0.7008,
"step": 1487
},
{
"epoch": 0.09,
"grad_norm": 0.942547619342804,
"learning_rate": 9.89215553416795e-06,
"loss": 0.6456,
"step": 1488
},
{
"epoch": 0.09,
"grad_norm": 0.9299596548080444,
"learning_rate": 9.8919434881714e-06,
"loss": 0.6347,
"step": 1489
},
{
"epoch": 0.09,
"grad_norm": 0.9081819653511047,
"learning_rate": 9.891731236192127e-06,
"loss": 0.6871,
"step": 1490
},
{
"epoch": 0.09,
"grad_norm": 0.903308093547821,
"learning_rate": 9.89151877823907e-06,
"loss": 0.644,
"step": 1491
},
{
"epoch": 0.09,
"grad_norm": 0.8742372989654541,
"learning_rate": 9.891306114321175e-06,
"loss": 0.6389,
"step": 1492
},
{
"epoch": 0.09,
"grad_norm": 0.9535795450210571,
"learning_rate": 9.891093244447393e-06,
"loss": 0.6408,
"step": 1493
},
{
"epoch": 0.09,
"grad_norm": 0.9566690921783447,
"learning_rate": 9.890880168626691e-06,
"loss": 0.6521,
"step": 1494
},
{
"epoch": 0.09,
"grad_norm": 0.9142457246780396,
"learning_rate": 9.890666886868038e-06,
"loss": 0.6411,
"step": 1495
},
{
"epoch": 0.09,
"grad_norm": 0.8510489463806152,
"learning_rate": 9.890453399180415e-06,
"loss": 0.6156,
"step": 1496
},
{
"epoch": 0.09,
"grad_norm": 0.9929180145263672,
"learning_rate": 9.890239705572815e-06,
"loss": 0.6782,
"step": 1497
},
{
"epoch": 0.09,
"grad_norm": 0.93791264295578,
"learning_rate": 9.89002580605423e-06,
"loss": 0.6499,
"step": 1498
},
{
"epoch": 0.09,
"grad_norm": 0.9413290619850159,
"learning_rate": 9.88981170063367e-06,
"loss": 0.6317,
"step": 1499
},
{
"epoch": 0.1,
"grad_norm": 0.9057697057723999,
"learning_rate": 9.88959738932015e-06,
"loss": 0.5684,
"step": 1500
},
{
"epoch": 0.1,
"grad_norm": 0.9506174921989441,
"learning_rate": 9.889382872122693e-06,
"loss": 0.7017,
"step": 1501
},
{
"epoch": 0.1,
"grad_norm": 0.9269284605979919,
"learning_rate": 9.889168149050334e-06,
"loss": 0.6496,
"step": 1502
},
{
"epoch": 0.1,
"grad_norm": 0.9708095192909241,
"learning_rate": 9.88895322011211e-06,
"loss": 0.7373,
"step": 1503
},
{
"epoch": 0.1,
"grad_norm": 0.9477187991142273,
"learning_rate": 9.888738085317075e-06,
"loss": 0.7015,
"step": 1504
},
{
"epoch": 0.1,
"grad_norm": 0.8957401514053345,
"learning_rate": 9.888522744674286e-06,
"loss": 0.6327,
"step": 1505
},
{
"epoch": 0.1,
"grad_norm": 0.9387091994285583,
"learning_rate": 9.888307198192808e-06,
"loss": 0.6296,
"step": 1506
},
{
"epoch": 0.1,
"grad_norm": 0.9464743733406067,
"learning_rate": 9.888091445881723e-06,
"loss": 0.6616,
"step": 1507
},
{
"epoch": 0.1,
"grad_norm": 0.944981575012207,
"learning_rate": 9.887875487750108e-06,
"loss": 0.637,
"step": 1508
},
{
"epoch": 0.1,
"grad_norm": 0.9454977512359619,
"learning_rate": 9.887659323807062e-06,
"loss": 0.6645,
"step": 1509
},
{
"epoch": 0.1,
"grad_norm": 0.9209526777267456,
"learning_rate": 9.887442954061684e-06,
"loss": 0.6978,
"step": 1510
},
{
"epoch": 0.1,
"grad_norm": 0.9140705466270447,
"learning_rate": 9.887226378523085e-06,
"loss": 0.6424,
"step": 1511
},
{
"epoch": 0.1,
"grad_norm": 0.9300777316093445,
"learning_rate": 9.887009597200385e-06,
"loss": 0.6293,
"step": 1512
},
{
"epoch": 0.1,
"grad_norm": 0.883039653301239,
"learning_rate": 9.88679261010271e-06,
"loss": 0.561,
"step": 1513
},
{
"epoch": 0.1,
"grad_norm": 0.9039274454116821,
"learning_rate": 9.886575417239202e-06,
"loss": 0.6245,
"step": 1514
},
{
"epoch": 0.1,
"grad_norm": 0.9318472743034363,
"learning_rate": 9.886358018619e-06,
"loss": 0.6637,
"step": 1515
},
{
"epoch": 0.1,
"grad_norm": 0.853915810585022,
"learning_rate": 9.886140414251259e-06,
"loss": 0.6292,
"step": 1516
},
{
"epoch": 0.1,
"grad_norm": 0.996114194393158,
"learning_rate": 9.885922604145143e-06,
"loss": 0.6856,
"step": 1517
},
{
"epoch": 0.1,
"grad_norm": 0.9068061113357544,
"learning_rate": 9.885704588309825e-06,
"loss": 0.6218,
"step": 1518
},
{
"epoch": 0.1,
"grad_norm": 0.9396615624427795,
"learning_rate": 9.885486366754482e-06,
"loss": 0.6889,
"step": 1519
},
{
"epoch": 0.1,
"grad_norm": 0.9767167568206787,
"learning_rate": 9.885267939488303e-06,
"loss": 0.669,
"step": 1520
},
{
"epoch": 0.1,
"grad_norm": 0.9243539571762085,
"learning_rate": 9.885049306520487e-06,
"loss": 0.571,
"step": 1521
},
{
"epoch": 0.1,
"grad_norm": 0.9698777794837952,
"learning_rate": 9.884830467860238e-06,
"loss": 0.6195,
"step": 1522
},
{
"epoch": 0.1,
"grad_norm": 0.9754754900932312,
"learning_rate": 9.88461142351677e-06,
"loss": 0.6712,
"step": 1523
},
{
"epoch": 0.1,
"grad_norm": 0.9276134371757507,
"learning_rate": 9.884392173499308e-06,
"loss": 0.6022,
"step": 1524
},
{
"epoch": 0.1,
"grad_norm": 0.8962921500205994,
"learning_rate": 9.884172717817085e-06,
"loss": 0.6694,
"step": 1525
},
{
"epoch": 0.1,
"grad_norm": 0.9050678610801697,
"learning_rate": 9.883953056479336e-06,
"loss": 0.6422,
"step": 1526
},
{
"epoch": 0.1,
"grad_norm": 1.006984829902649,
"learning_rate": 9.883733189495316e-06,
"loss": 0.6856,
"step": 1527
},
{
"epoch": 0.1,
"grad_norm": 0.9265629053115845,
"learning_rate": 9.88351311687428e-06,
"loss": 0.6231,
"step": 1528
},
{
"epoch": 0.1,
"grad_norm": 0.9328793883323669,
"learning_rate": 9.883292838625495e-06,
"loss": 0.6304,
"step": 1529
},
{
"epoch": 0.1,
"grad_norm": 0.9297760128974915,
"learning_rate": 9.883072354758237e-06,
"loss": 0.6102,
"step": 1530
},
{
"epoch": 0.1,
"grad_norm": 0.9190971851348877,
"learning_rate": 9.88285166528179e-06,
"loss": 0.6895,
"step": 1531
},
{
"epoch": 0.1,
"grad_norm": 0.9351559281349182,
"learning_rate": 9.882630770205444e-06,
"loss": 0.5951,
"step": 1532
},
{
"epoch": 0.1,
"grad_norm": 0.9502492547035217,
"learning_rate": 9.882409669538503e-06,
"loss": 0.6165,
"step": 1533
},
{
"epoch": 0.1,
"grad_norm": 0.9726475477218628,
"learning_rate": 9.882188363290273e-06,
"loss": 0.6672,
"step": 1534
},
{
"epoch": 0.1,
"grad_norm": 1.011664628982544,
"learning_rate": 9.881966851470077e-06,
"loss": 0.6367,
"step": 1535
},
{
"epoch": 0.1,
"grad_norm": 0.9154837727546692,
"learning_rate": 9.881745134087239e-06,
"loss": 0.6487,
"step": 1536
},
{
"epoch": 0.1,
"grad_norm": 0.9829793572425842,
"learning_rate": 9.881523211151097e-06,
"loss": 0.6535,
"step": 1537
},
{
"epoch": 0.1,
"grad_norm": 0.9159083366394043,
"learning_rate": 9.881301082670992e-06,
"loss": 0.6405,
"step": 1538
},
{
"epoch": 0.1,
"grad_norm": 0.9753561615943909,
"learning_rate": 9.881078748656282e-06,
"loss": 0.673,
"step": 1539
},
{
"epoch": 0.1,
"grad_norm": 0.961372971534729,
"learning_rate": 9.880856209116324e-06,
"loss": 0.6218,
"step": 1540
},
{
"epoch": 0.1,
"grad_norm": 0.9532050490379333,
"learning_rate": 9.880633464060492e-06,
"loss": 0.6335,
"step": 1541
},
{
"epoch": 0.1,
"grad_norm": 0.9294744729995728,
"learning_rate": 9.880410513498163e-06,
"loss": 0.6179,
"step": 1542
},
{
"epoch": 0.1,
"grad_norm": 0.979083240032196,
"learning_rate": 9.880187357438722e-06,
"loss": 0.6624,
"step": 1543
},
{
"epoch": 0.1,
"grad_norm": 0.9284359216690063,
"learning_rate": 9.87996399589157e-06,
"loss": 0.646,
"step": 1544
},
{
"epoch": 0.1,
"grad_norm": 0.9217939376831055,
"learning_rate": 9.87974042886611e-06,
"loss": 0.6153,
"step": 1545
},
{
"epoch": 0.1,
"grad_norm": 0.8446288704872131,
"learning_rate": 9.879516656371758e-06,
"loss": 0.5636,
"step": 1546
},
{
"epoch": 0.1,
"grad_norm": 1.0131950378417969,
"learning_rate": 9.879292678417934e-06,
"loss": 0.6842,
"step": 1547
},
{
"epoch": 0.1,
"grad_norm": 0.9436971545219421,
"learning_rate": 9.879068495014068e-06,
"loss": 0.6342,
"step": 1548
},
{
"epoch": 0.1,
"grad_norm": 0.9207556843757629,
"learning_rate": 9.878844106169601e-06,
"loss": 0.717,
"step": 1549
},
{
"epoch": 0.1,
"grad_norm": 0.9688981771469116,
"learning_rate": 9.87861951189398e-06,
"loss": 0.6763,
"step": 1550
},
{
"epoch": 0.1,
"grad_norm": 0.9991617798805237,
"learning_rate": 9.878394712196665e-06,
"loss": 0.6928,
"step": 1551
},
{
"epoch": 0.1,
"grad_norm": 0.9736862182617188,
"learning_rate": 9.878169707087116e-06,
"loss": 0.6552,
"step": 1552
},
{
"epoch": 0.1,
"grad_norm": 0.9318220019340515,
"learning_rate": 9.877944496574813e-06,
"loss": 0.5917,
"step": 1553
},
{
"epoch": 0.1,
"grad_norm": 0.8690041303634644,
"learning_rate": 9.877719080669235e-06,
"loss": 0.6064,
"step": 1554
},
{
"epoch": 0.1,
"grad_norm": 0.9481027126312256,
"learning_rate": 9.877493459379876e-06,
"loss": 0.6604,
"step": 1555
},
{
"epoch": 0.1,
"grad_norm": 1.0152469873428345,
"learning_rate": 9.877267632716235e-06,
"loss": 0.7071,
"step": 1556
},
{
"epoch": 0.1,
"grad_norm": 0.9626147150993347,
"learning_rate": 9.87704160068782e-06,
"loss": 0.623,
"step": 1557
},
{
"epoch": 0.1,
"grad_norm": 0.9506250619888306,
"learning_rate": 9.87681536330415e-06,
"loss": 0.6372,
"step": 1558
},
{
"epoch": 0.1,
"grad_norm": 0.9783592224121094,
"learning_rate": 9.87658892057475e-06,
"loss": 0.6661,
"step": 1559
},
{
"epoch": 0.1,
"grad_norm": 0.9387713670730591,
"learning_rate": 9.876362272509154e-06,
"loss": 0.7131,
"step": 1560
},
{
"epoch": 0.1,
"grad_norm": 0.9006531238555908,
"learning_rate": 9.876135419116908e-06,
"loss": 0.6329,
"step": 1561
},
{
"epoch": 0.1,
"grad_norm": 0.9375502467155457,
"learning_rate": 9.87590836040756e-06,
"loss": 0.6184,
"step": 1562
},
{
"epoch": 0.1,
"grad_norm": 0.9109377264976501,
"learning_rate": 9.875681096390676e-06,
"loss": 0.6317,
"step": 1563
},
{
"epoch": 0.1,
"grad_norm": 0.9256362915039062,
"learning_rate": 9.87545362707582e-06,
"loss": 0.6397,
"step": 1564
},
{
"epoch": 0.1,
"grad_norm": 0.9016781449317932,
"learning_rate": 9.875225952472574e-06,
"loss": 0.6329,
"step": 1565
},
{
"epoch": 0.1,
"grad_norm": 0.9019981026649475,
"learning_rate": 9.874998072590521e-06,
"loss": 0.6723,
"step": 1566
},
{
"epoch": 0.1,
"grad_norm": 0.9814824461936951,
"learning_rate": 9.874769987439259e-06,
"loss": 0.6784,
"step": 1567
},
{
"epoch": 0.1,
"grad_norm": 0.9205242395401001,
"learning_rate": 9.87454169702839e-06,
"loss": 0.6541,
"step": 1568
},
{
"epoch": 0.1,
"grad_norm": 1.0002273321151733,
"learning_rate": 9.87431320136753e-06,
"loss": 0.5924,
"step": 1569
},
{
"epoch": 0.1,
"grad_norm": 0.9376479983329773,
"learning_rate": 9.874084500466295e-06,
"loss": 0.6596,
"step": 1570
},
{
"epoch": 0.1,
"grad_norm": 0.872928261756897,
"learning_rate": 9.873855594334319e-06,
"loss": 0.6838,
"step": 1571
},
{
"epoch": 0.1,
"grad_norm": 0.902869701385498,
"learning_rate": 9.873626482981238e-06,
"loss": 0.6284,
"step": 1572
},
{
"epoch": 0.1,
"grad_norm": 0.9037356972694397,
"learning_rate": 9.873397166416698e-06,
"loss": 0.6083,
"step": 1573
},
{
"epoch": 0.1,
"grad_norm": 0.8765510320663452,
"learning_rate": 9.87316764465036e-06,
"loss": 0.6122,
"step": 1574
},
{
"epoch": 0.1,
"grad_norm": 0.9921714067459106,
"learning_rate": 9.872937917691883e-06,
"loss": 0.5799,
"step": 1575
},
{
"epoch": 0.1,
"grad_norm": 0.9323515295982361,
"learning_rate": 9.872707985550942e-06,
"loss": 0.5727,
"step": 1576
},
{
"epoch": 0.1,
"grad_norm": 0.9624417424201965,
"learning_rate": 9.872477848237221e-06,
"loss": 0.6477,
"step": 1577
},
{
"epoch": 0.1,
"grad_norm": 0.9209104180335999,
"learning_rate": 9.872247505760405e-06,
"loss": 0.6059,
"step": 1578
},
{
"epoch": 0.1,
"grad_norm": 0.9874113202095032,
"learning_rate": 9.872016958130197e-06,
"loss": 0.6308,
"step": 1579
},
{
"epoch": 0.1,
"grad_norm": 0.981163740158081,
"learning_rate": 9.871786205356303e-06,
"loss": 0.6446,
"step": 1580
},
{
"epoch": 0.1,
"grad_norm": 0.9238435626029968,
"learning_rate": 9.871555247448442e-06,
"loss": 0.6831,
"step": 1581
},
{
"epoch": 0.1,
"grad_norm": 1.0302647352218628,
"learning_rate": 9.871324084416332e-06,
"loss": 0.6095,
"step": 1582
},
{
"epoch": 0.1,
"grad_norm": 0.9587137699127197,
"learning_rate": 9.871092716269714e-06,
"loss": 0.6613,
"step": 1583
},
{
"epoch": 0.1,
"grad_norm": 0.9017694592475891,
"learning_rate": 9.870861143018327e-06,
"loss": 0.6368,
"step": 1584
},
{
"epoch": 0.1,
"grad_norm": 0.9232084155082703,
"learning_rate": 9.87062936467192e-06,
"loss": 0.6588,
"step": 1585
},
{
"epoch": 0.1,
"grad_norm": 0.9299889206886292,
"learning_rate": 9.870397381240256e-06,
"loss": 0.6222,
"step": 1586
},
{
"epoch": 0.1,
"grad_norm": 0.8801997900009155,
"learning_rate": 9.870165192733101e-06,
"loss": 0.6371,
"step": 1587
},
{
"epoch": 0.1,
"grad_norm": 0.9414759278297424,
"learning_rate": 9.869932799160232e-06,
"loss": 0.6735,
"step": 1588
},
{
"epoch": 0.1,
"grad_norm": 1.5504239797592163,
"learning_rate": 9.869700200531431e-06,
"loss": 0.6738,
"step": 1589
},
{
"epoch": 0.1,
"grad_norm": 0.9341895580291748,
"learning_rate": 9.869467396856499e-06,
"loss": 0.6024,
"step": 1590
},
{
"epoch": 0.1,
"grad_norm": 0.921317994594574,
"learning_rate": 9.869234388145232e-06,
"loss": 0.6963,
"step": 1591
},
{
"epoch": 0.1,
"grad_norm": 0.9685119986534119,
"learning_rate": 9.869001174407444e-06,
"loss": 0.5984,
"step": 1592
},
{
"epoch": 0.1,
"grad_norm": 0.8759018182754517,
"learning_rate": 9.868767755652955e-06,
"loss": 0.6223,
"step": 1593
},
{
"epoch": 0.1,
"grad_norm": 0.8878785371780396,
"learning_rate": 9.868534131891594e-06,
"loss": 0.6196,
"step": 1594
},
{
"epoch": 0.1,
"grad_norm": 0.9563702344894409,
"learning_rate": 9.868300303133195e-06,
"loss": 0.6902,
"step": 1595
},
{
"epoch": 0.1,
"grad_norm": 0.9496309757232666,
"learning_rate": 9.868066269387609e-06,
"loss": 0.6131,
"step": 1596
},
{
"epoch": 0.1,
"grad_norm": 0.9410830140113831,
"learning_rate": 9.867832030664685e-06,
"loss": 0.6433,
"step": 1597
},
{
"epoch": 0.1,
"grad_norm": 1.0077545642852783,
"learning_rate": 9.867597586974288e-06,
"loss": 0.6728,
"step": 1598
},
{
"epoch": 0.1,
"grad_norm": 0.9375026226043701,
"learning_rate": 9.86736293832629e-06,
"loss": 0.6316,
"step": 1599
},
{
"epoch": 0.1,
"grad_norm": 0.9416118264198303,
"learning_rate": 9.86712808473057e-06,
"loss": 0.662,
"step": 1600
},
{
"epoch": 0.1,
"grad_norm": 0.9092760682106018,
"learning_rate": 9.86689302619702e-06,
"loss": 0.6317,
"step": 1601
},
{
"epoch": 0.1,
"grad_norm": 0.9220471978187561,
"learning_rate": 9.866657762735534e-06,
"loss": 0.6576,
"step": 1602
},
{
"epoch": 0.1,
"grad_norm": 0.9349024295806885,
"learning_rate": 9.866422294356019e-06,
"loss": 0.6976,
"step": 1603
},
{
"epoch": 0.1,
"grad_norm": 0.8267975449562073,
"learning_rate": 9.866186621068391e-06,
"loss": 0.5685,
"step": 1604
},
{
"epoch": 0.1,
"grad_norm": 0.9613746404647827,
"learning_rate": 9.865950742882574e-06,
"loss": 0.6038,
"step": 1605
},
{
"epoch": 0.1,
"grad_norm": 0.9852586984634399,
"learning_rate": 9.865714659808497e-06,
"loss": 0.6429,
"step": 1606
},
{
"epoch": 0.1,
"grad_norm": 0.9281002283096313,
"learning_rate": 9.865478371856102e-06,
"loss": 0.6473,
"step": 1607
},
{
"epoch": 0.1,
"grad_norm": 0.8990695476531982,
"learning_rate": 9.86524187903534e-06,
"loss": 0.6667,
"step": 1608
},
{
"epoch": 0.1,
"grad_norm": 0.9048877358436584,
"learning_rate": 9.865005181356166e-06,
"loss": 0.6437,
"step": 1609
},
{
"epoch": 0.1,
"grad_norm": 0.957685649394989,
"learning_rate": 9.864768278828548e-06,
"loss": 0.6228,
"step": 1610
},
{
"epoch": 0.1,
"grad_norm": 0.8602043986320496,
"learning_rate": 9.864531171462462e-06,
"loss": 0.5928,
"step": 1611
},
{
"epoch": 0.1,
"grad_norm": 0.9182524085044861,
"learning_rate": 9.86429385926789e-06,
"loss": 0.6859,
"step": 1612
},
{
"epoch": 0.1,
"grad_norm": 0.9720632433891296,
"learning_rate": 9.864056342254827e-06,
"loss": 0.6562,
"step": 1613
},
{
"epoch": 0.1,
"grad_norm": 0.9607463479042053,
"learning_rate": 9.86381862043327e-06,
"loss": 0.6783,
"step": 1614
},
{
"epoch": 0.1,
"grad_norm": 0.9194375276565552,
"learning_rate": 9.863580693813232e-06,
"loss": 0.6433,
"step": 1615
},
{
"epoch": 0.1,
"grad_norm": 0.9061447381973267,
"learning_rate": 9.86334256240473e-06,
"loss": 0.6577,
"step": 1616
},
{
"epoch": 0.1,
"grad_norm": 0.911880373954773,
"learning_rate": 9.86310422621779e-06,
"loss": 0.6437,
"step": 1617
},
{
"epoch": 0.1,
"grad_norm": 0.9014673233032227,
"learning_rate": 9.86286568526245e-06,
"loss": 0.6688,
"step": 1618
},
{
"epoch": 0.1,
"grad_norm": 0.8759530782699585,
"learning_rate": 9.862626939548751e-06,
"loss": 0.6889,
"step": 1619
},
{
"epoch": 0.1,
"grad_norm": 0.8872689008712769,
"learning_rate": 9.862387989086749e-06,
"loss": 0.6351,
"step": 1620
},
{
"epoch": 0.1,
"grad_norm": 0.912520706653595,
"learning_rate": 9.862148833886504e-06,
"loss": 0.6573,
"step": 1621
},
{
"epoch": 0.1,
"grad_norm": 0.935406506061554,
"learning_rate": 9.861909473958084e-06,
"loss": 0.6349,
"step": 1622
},
{
"epoch": 0.1,
"grad_norm": 0.9377623796463013,
"learning_rate": 9.861669909311571e-06,
"loss": 0.6324,
"step": 1623
},
{
"epoch": 0.1,
"grad_norm": 0.8664435744285583,
"learning_rate": 9.861430139957052e-06,
"loss": 0.6517,
"step": 1624
},
{
"epoch": 0.1,
"grad_norm": 0.9497208595275879,
"learning_rate": 9.861190165904617e-06,
"loss": 0.6703,
"step": 1625
},
{
"epoch": 0.1,
"grad_norm": 0.9303921461105347,
"learning_rate": 9.860949987164379e-06,
"loss": 0.611,
"step": 1626
},
{
"epoch": 0.1,
"grad_norm": 0.944831371307373,
"learning_rate": 9.860709603746445e-06,
"loss": 0.6534,
"step": 1627
},
{
"epoch": 0.1,
"grad_norm": 0.9013164043426514,
"learning_rate": 9.86046901566094e-06,
"loss": 0.6457,
"step": 1628
},
{
"epoch": 0.1,
"grad_norm": 0.9437874555587769,
"learning_rate": 9.860228222917992e-06,
"loss": 0.6238,
"step": 1629
},
{
"epoch": 0.1,
"grad_norm": 0.901542067527771,
"learning_rate": 9.859987225527742e-06,
"loss": 0.6299,
"step": 1630
},
{
"epoch": 0.1,
"grad_norm": 0.963375449180603,
"learning_rate": 9.859746023500337e-06,
"loss": 0.6798,
"step": 1631
},
{
"epoch": 0.1,
"grad_norm": 0.9021002054214478,
"learning_rate": 9.85950461684593e-06,
"loss": 0.6386,
"step": 1632
},
{
"epoch": 0.1,
"grad_norm": 0.932859480381012,
"learning_rate": 9.85926300557469e-06,
"loss": 0.6516,
"step": 1633
},
{
"epoch": 0.1,
"grad_norm": 0.8896989822387695,
"learning_rate": 9.85902118969679e-06,
"loss": 0.6372,
"step": 1634
},
{
"epoch": 0.1,
"grad_norm": 0.9466985464096069,
"learning_rate": 9.85877916922241e-06,
"loss": 0.6244,
"step": 1635
},
{
"epoch": 0.1,
"grad_norm": 0.9208292961120605,
"learning_rate": 9.858536944161743e-06,
"loss": 0.6742,
"step": 1636
},
{
"epoch": 0.1,
"grad_norm": 0.9316291213035583,
"learning_rate": 9.858294514524987e-06,
"loss": 0.6306,
"step": 1637
},
{
"epoch": 0.1,
"grad_norm": 0.9085369110107422,
"learning_rate": 9.858051880322347e-06,
"loss": 0.5967,
"step": 1638
},
{
"epoch": 0.1,
"grad_norm": 0.9222848415374756,
"learning_rate": 9.857809041564044e-06,
"loss": 0.656,
"step": 1639
},
{
"epoch": 0.1,
"grad_norm": 0.9137614369392395,
"learning_rate": 9.857565998260302e-06,
"loss": 0.6778,
"step": 1640
},
{
"epoch": 0.1,
"grad_norm": 0.8836297392845154,
"learning_rate": 9.857322750421353e-06,
"loss": 0.6172,
"step": 1641
},
{
"epoch": 0.1,
"grad_norm": 0.9377101063728333,
"learning_rate": 9.857079298057442e-06,
"loss": 0.6562,
"step": 1642
},
{
"epoch": 0.1,
"grad_norm": 0.938580334186554,
"learning_rate": 9.856835641178816e-06,
"loss": 0.6937,
"step": 1643
},
{
"epoch": 0.1,
"grad_norm": 0.9680647253990173,
"learning_rate": 9.856591779795738e-06,
"loss": 0.6493,
"step": 1644
},
{
"epoch": 0.1,
"grad_norm": 0.9074171781539917,
"learning_rate": 9.856347713918475e-06,
"loss": 0.6752,
"step": 1645
},
{
"epoch": 0.1,
"grad_norm": 0.8547381162643433,
"learning_rate": 9.856103443557304e-06,
"loss": 0.623,
"step": 1646
},
{
"epoch": 0.1,
"grad_norm": 1.0403729677200317,
"learning_rate": 9.85585896872251e-06,
"loss": 0.6593,
"step": 1647
},
{
"epoch": 0.1,
"grad_norm": 0.9444959163665771,
"learning_rate": 9.855614289424386e-06,
"loss": 0.634,
"step": 1648
},
{
"epoch": 0.1,
"grad_norm": 0.9254828095436096,
"learning_rate": 9.855369405673236e-06,
"loss": 0.5757,
"step": 1649
},
{
"epoch": 0.1,
"grad_norm": 0.9265499711036682,
"learning_rate": 9.855124317479372e-06,
"loss": 0.6326,
"step": 1650
},
{
"epoch": 0.1,
"grad_norm": 0.9064761400222778,
"learning_rate": 9.854879024853113e-06,
"loss": 0.6488,
"step": 1651
},
{
"epoch": 0.1,
"grad_norm": 0.9563080072402954,
"learning_rate": 9.854633527804787e-06,
"loss": 0.642,
"step": 1652
},
{
"epoch": 0.1,
"grad_norm": 0.838525116443634,
"learning_rate": 9.85438782634473e-06,
"loss": 0.5696,
"step": 1653
},
{
"epoch": 0.1,
"grad_norm": 0.8792423009872437,
"learning_rate": 9.854141920483289e-06,
"loss": 0.6282,
"step": 1654
},
{
"epoch": 0.1,
"grad_norm": 0.9897140264511108,
"learning_rate": 9.853895810230818e-06,
"loss": 0.6317,
"step": 1655
},
{
"epoch": 0.1,
"grad_norm": 0.9357428550720215,
"learning_rate": 9.853649495597682e-06,
"loss": 0.691,
"step": 1656
},
{
"epoch": 0.1,
"grad_norm": 0.8924740552902222,
"learning_rate": 9.853402976594248e-06,
"loss": 0.6754,
"step": 1657
},
{
"epoch": 0.11,
"grad_norm": 0.9512656331062317,
"learning_rate": 9.8531562532309e-06,
"loss": 0.6218,
"step": 1658
},
{
"epoch": 0.11,
"grad_norm": 0.9587389826774597,
"learning_rate": 9.852909325518022e-06,
"loss": 0.6707,
"step": 1659
},
{
"epoch": 0.11,
"grad_norm": 0.9361017942428589,
"learning_rate": 9.852662193466019e-06,
"loss": 0.6475,
"step": 1660
},
{
"epoch": 0.11,
"grad_norm": 0.9150497913360596,
"learning_rate": 9.852414857085288e-06,
"loss": 0.7143,
"step": 1661
},
{
"epoch": 0.11,
"grad_norm": 0.9618809223175049,
"learning_rate": 9.85216731638625e-06,
"loss": 0.646,
"step": 1662
},
{
"epoch": 0.11,
"grad_norm": 0.8974446654319763,
"learning_rate": 9.851919571379326e-06,
"loss": 0.6958,
"step": 1663
},
{
"epoch": 0.11,
"grad_norm": 0.9085642099380493,
"learning_rate": 9.851671622074947e-06,
"loss": 0.6291,
"step": 1664
},
{
"epoch": 0.11,
"grad_norm": 0.9605396389961243,
"learning_rate": 9.851423468483554e-06,
"loss": 0.669,
"step": 1665
},
{
"epoch": 0.11,
"grad_norm": 1.0041121244430542,
"learning_rate": 9.851175110615594e-06,
"loss": 0.5982,
"step": 1666
},
{
"epoch": 0.11,
"grad_norm": 0.9205458164215088,
"learning_rate": 9.850926548481528e-06,
"loss": 0.6587,
"step": 1667
},
{
"epoch": 0.11,
"grad_norm": 0.9921442270278931,
"learning_rate": 9.850677782091818e-06,
"loss": 0.6505,
"step": 1668
},
{
"epoch": 0.11,
"grad_norm": 0.9092791080474854,
"learning_rate": 9.850428811456943e-06,
"loss": 0.5881,
"step": 1669
},
{
"epoch": 0.11,
"grad_norm": 1.0009846687316895,
"learning_rate": 9.850179636587383e-06,
"loss": 0.657,
"step": 1670
},
{
"epoch": 0.11,
"grad_norm": 0.9284378886222839,
"learning_rate": 9.849930257493632e-06,
"loss": 0.616,
"step": 1671
},
{
"epoch": 0.11,
"grad_norm": 1.0465761423110962,
"learning_rate": 9.849680674186188e-06,
"loss": 0.7,
"step": 1672
},
{
"epoch": 0.11,
"grad_norm": 0.9236946105957031,
"learning_rate": 9.849430886675564e-06,
"loss": 0.6498,
"step": 1673
},
{
"epoch": 0.11,
"grad_norm": 0.8840457201004028,
"learning_rate": 9.849180894972272e-06,
"loss": 0.6517,
"step": 1674
},
{
"epoch": 0.11,
"grad_norm": 0.8201990723609924,
"learning_rate": 9.848930699086846e-06,
"loss": 0.6403,
"step": 1675
},
{
"epoch": 0.11,
"grad_norm": 0.9330858588218689,
"learning_rate": 9.848680299029813e-06,
"loss": 0.6374,
"step": 1676
},
{
"epoch": 0.11,
"grad_norm": 0.9151015877723694,
"learning_rate": 9.848429694811721e-06,
"loss": 0.5886,
"step": 1677
},
{
"epoch": 0.11,
"grad_norm": 0.9654482007026672,
"learning_rate": 9.84817888644312e-06,
"loss": 0.6554,
"step": 1678
},
{
"epoch": 0.11,
"grad_norm": 0.9523151516914368,
"learning_rate": 9.847927873934573e-06,
"loss": 0.6361,
"step": 1679
},
{
"epoch": 0.11,
"grad_norm": 0.9912353157997131,
"learning_rate": 9.847676657296647e-06,
"loss": 0.6584,
"step": 1680
},
{
"epoch": 0.11,
"grad_norm": 0.937496542930603,
"learning_rate": 9.847425236539922e-06,
"loss": 0.6502,
"step": 1681
},
{
"epoch": 0.11,
"grad_norm": 0.8653977513313293,
"learning_rate": 9.847173611674982e-06,
"loss": 0.605,
"step": 1682
},
{
"epoch": 0.11,
"grad_norm": 0.9031038880348206,
"learning_rate": 9.846921782712424e-06,
"loss": 0.6144,
"step": 1683
},
{
"epoch": 0.11,
"grad_norm": 0.9280396699905396,
"learning_rate": 9.846669749662851e-06,
"loss": 0.615,
"step": 1684
},
{
"epoch": 0.11,
"grad_norm": 0.9092390537261963,
"learning_rate": 9.846417512536874e-06,
"loss": 0.6235,
"step": 1685
},
{
"epoch": 0.11,
"grad_norm": 0.9543402791023254,
"learning_rate": 9.846165071345118e-06,
"loss": 0.6555,
"step": 1686
},
{
"epoch": 0.11,
"grad_norm": 1.045227289199829,
"learning_rate": 9.845912426098206e-06,
"loss": 0.6761,
"step": 1687
},
{
"epoch": 0.11,
"grad_norm": 0.9226247072219849,
"learning_rate": 9.845659576806781e-06,
"loss": 0.6211,
"step": 1688
},
{
"epoch": 0.11,
"grad_norm": 0.9279161691665649,
"learning_rate": 9.845406523481488e-06,
"loss": 0.6579,
"step": 1689
},
{
"epoch": 0.11,
"grad_norm": 0.91354900598526,
"learning_rate": 9.845153266132981e-06,
"loss": 0.6024,
"step": 1690
},
{
"epoch": 0.11,
"grad_norm": 0.9093358516693115,
"learning_rate": 9.844899804771927e-06,
"loss": 0.6232,
"step": 1691
},
{
"epoch": 0.11,
"grad_norm": 0.9965054988861084,
"learning_rate": 9.844646139408995e-06,
"loss": 0.6429,
"step": 1692
},
{
"epoch": 0.11,
"grad_norm": 0.9603714346885681,
"learning_rate": 9.844392270054868e-06,
"loss": 0.622,
"step": 1693
},
{
"epoch": 0.11,
"grad_norm": 0.9581913948059082,
"learning_rate": 9.844138196720236e-06,
"loss": 0.6845,
"step": 1694
},
{
"epoch": 0.11,
"grad_norm": 0.911685585975647,
"learning_rate": 9.843883919415795e-06,
"loss": 0.685,
"step": 1695
},
{
"epoch": 0.11,
"grad_norm": 0.9244683980941772,
"learning_rate": 9.843629438152252e-06,
"loss": 0.6441,
"step": 1696
},
{
"epoch": 0.11,
"grad_norm": 0.9643012285232544,
"learning_rate": 9.843374752940323e-06,
"loss": 0.6346,
"step": 1697
},
{
"epoch": 0.11,
"grad_norm": 0.9334665536880493,
"learning_rate": 9.843119863790733e-06,
"loss": 0.7161,
"step": 1698
},
{
"epoch": 0.11,
"grad_norm": 0.9710047245025635,
"learning_rate": 9.842864770714213e-06,
"loss": 0.6233,
"step": 1699
},
{
"epoch": 0.11,
"grad_norm": 0.9540897607803345,
"learning_rate": 9.842609473721505e-06,
"loss": 0.6271,
"step": 1700
},
{
"epoch": 0.11,
"grad_norm": 0.9325253367424011,
"learning_rate": 9.842353972823358e-06,
"loss": 0.6153,
"step": 1701
},
{
"epoch": 0.11,
"grad_norm": 0.8942682147026062,
"learning_rate": 9.842098268030532e-06,
"loss": 0.5922,
"step": 1702
},
{
"epoch": 0.11,
"grad_norm": 0.930939793586731,
"learning_rate": 9.84184235935379e-06,
"loss": 0.6366,
"step": 1703
},
{
"epoch": 0.11,
"grad_norm": 0.9117228984832764,
"learning_rate": 9.84158624680391e-06,
"loss": 0.647,
"step": 1704
},
{
"epoch": 0.11,
"grad_norm": 0.9259521961212158,
"learning_rate": 9.841329930391678e-06,
"loss": 0.6384,
"step": 1705
},
{
"epoch": 0.11,
"grad_norm": 0.867400050163269,
"learning_rate": 9.841073410127884e-06,
"loss": 0.6741,
"step": 1706
},
{
"epoch": 0.11,
"grad_norm": 1.0280332565307617,
"learning_rate": 9.840816686023329e-06,
"loss": 0.64,
"step": 1707
},
{
"epoch": 0.11,
"grad_norm": 0.9076325297355652,
"learning_rate": 9.840559758088821e-06,
"loss": 0.5936,
"step": 1708
},
{
"epoch": 0.11,
"grad_norm": 0.9110800623893738,
"learning_rate": 9.840302626335182e-06,
"loss": 0.6145,
"step": 1709
},
{
"epoch": 0.11,
"grad_norm": 0.8760718107223511,
"learning_rate": 9.84004529077324e-06,
"loss": 0.6375,
"step": 1710
},
{
"epoch": 0.11,
"grad_norm": 0.9137043356895447,
"learning_rate": 9.839787751413825e-06,
"loss": 0.6016,
"step": 1711
},
{
"epoch": 0.11,
"grad_norm": 0.8688681125640869,
"learning_rate": 9.839530008267785e-06,
"loss": 0.6208,
"step": 1712
},
{
"epoch": 0.11,
"grad_norm": 0.9339778423309326,
"learning_rate": 9.839272061345974e-06,
"loss": 0.6514,
"step": 1713
},
{
"epoch": 0.11,
"grad_norm": 0.9327898025512695,
"learning_rate": 9.839013910659249e-06,
"loss": 0.6528,
"step": 1714
},
{
"epoch": 0.11,
"grad_norm": 0.9317489266395569,
"learning_rate": 9.838755556218483e-06,
"loss": 0.6433,
"step": 1715
},
{
"epoch": 0.11,
"grad_norm": 0.9479151368141174,
"learning_rate": 9.838496998034552e-06,
"loss": 0.692,
"step": 1716
},
{
"epoch": 0.11,
"grad_norm": 0.9237775802612305,
"learning_rate": 9.838238236118344e-06,
"loss": 0.621,
"step": 1717
},
{
"epoch": 0.11,
"grad_norm": 0.9310511946678162,
"learning_rate": 9.837979270480758e-06,
"loss": 0.6333,
"step": 1718
},
{
"epoch": 0.11,
"grad_norm": 0.877641499042511,
"learning_rate": 9.837720101132692e-06,
"loss": 0.6943,
"step": 1719
},
{
"epoch": 0.11,
"grad_norm": 0.9281149506568909,
"learning_rate": 9.837460728085062e-06,
"loss": 0.6446,
"step": 1720
},
{
"epoch": 0.11,
"grad_norm": 0.9767260551452637,
"learning_rate": 9.83720115134879e-06,
"loss": 0.7114,
"step": 1721
},
{
"epoch": 0.11,
"grad_norm": 0.9054911732673645,
"learning_rate": 9.836941370934806e-06,
"loss": 0.6143,
"step": 1722
},
{
"epoch": 0.11,
"grad_norm": 0.9925005435943604,
"learning_rate": 9.836681386854045e-06,
"loss": 0.6386,
"step": 1723
},
{
"epoch": 0.11,
"grad_norm": 0.9308101534843445,
"learning_rate": 9.836421199117456e-06,
"loss": 0.6501,
"step": 1724
},
{
"epoch": 0.11,
"grad_norm": 0.9074007868766785,
"learning_rate": 9.836160807735997e-06,
"loss": 0.6792,
"step": 1725
},
{
"epoch": 0.11,
"grad_norm": 0.9303346276283264,
"learning_rate": 9.83590021272063e-06,
"loss": 0.6218,
"step": 1726
},
{
"epoch": 0.11,
"grad_norm": 0.9566819667816162,
"learning_rate": 9.835639414082327e-06,
"loss": 0.6525,
"step": 1727
},
{
"epoch": 0.11,
"grad_norm": 0.9563994407653809,
"learning_rate": 9.83537841183207e-06,
"loss": 0.6473,
"step": 1728
},
{
"epoch": 0.11,
"grad_norm": 0.9133448600769043,
"learning_rate": 9.83511720598085e-06,
"loss": 0.61,
"step": 1729
},
{
"epoch": 0.11,
"grad_norm": 0.9543222784996033,
"learning_rate": 9.834855796539665e-06,
"loss": 0.614,
"step": 1730
},
{
"epoch": 0.11,
"grad_norm": 0.9356175661087036,
"learning_rate": 9.834594183519521e-06,
"loss": 0.6181,
"step": 1731
},
{
"epoch": 0.11,
"grad_norm": 0.9626755118370056,
"learning_rate": 9.834332366931435e-06,
"loss": 0.6355,
"step": 1732
},
{
"epoch": 0.11,
"grad_norm": 0.9340695142745972,
"learning_rate": 9.834070346786428e-06,
"loss": 0.6235,
"step": 1733
},
{
"epoch": 0.11,
"grad_norm": 0.916644811630249,
"learning_rate": 9.833808123095538e-06,
"loss": 0.6401,
"step": 1734
},
{
"epoch": 0.11,
"grad_norm": 0.9744462370872498,
"learning_rate": 9.833545695869802e-06,
"loss": 0.6916,
"step": 1735
},
{
"epoch": 0.11,
"grad_norm": 0.9321338534355164,
"learning_rate": 9.833283065120272e-06,
"loss": 0.6363,
"step": 1736
},
{
"epoch": 0.11,
"grad_norm": 0.9485877752304077,
"learning_rate": 9.833020230858005e-06,
"loss": 0.6865,
"step": 1737
},
{
"epoch": 0.11,
"grad_norm": 0.8846791982650757,
"learning_rate": 9.832757193094072e-06,
"loss": 0.6522,
"step": 1738
},
{
"epoch": 0.11,
"grad_norm": 0.8400406837463379,
"learning_rate": 9.832493951839541e-06,
"loss": 0.626,
"step": 1739
},
{
"epoch": 0.11,
"grad_norm": 0.9057971239089966,
"learning_rate": 9.832230507105504e-06,
"loss": 0.6248,
"step": 1740
},
{
"epoch": 0.11,
"grad_norm": 0.9419105052947998,
"learning_rate": 9.831966858903049e-06,
"loss": 0.6535,
"step": 1741
},
{
"epoch": 0.11,
"grad_norm": 0.8836336135864258,
"learning_rate": 9.83170300724328e-06,
"loss": 0.6482,
"step": 1742
},
{
"epoch": 0.11,
"grad_norm": 0.861971378326416,
"learning_rate": 9.831438952137304e-06,
"loss": 0.6039,
"step": 1743
},
{
"epoch": 0.11,
"grad_norm": 0.8943654298782349,
"learning_rate": 9.831174693596241e-06,
"loss": 0.6038,
"step": 1744
},
{
"epoch": 0.11,
"grad_norm": 0.9814664721488953,
"learning_rate": 9.83091023163122e-06,
"loss": 0.6729,
"step": 1745
},
{
"epoch": 0.11,
"grad_norm": 0.8936158418655396,
"learning_rate": 9.830645566253374e-06,
"loss": 0.6335,
"step": 1746
},
{
"epoch": 0.11,
"grad_norm": 0.9102863073348999,
"learning_rate": 9.830380697473848e-06,
"loss": 0.6611,
"step": 1747
},
{
"epoch": 0.11,
"grad_norm": 0.9278464913368225,
"learning_rate": 9.830115625303793e-06,
"loss": 0.6865,
"step": 1748
},
{
"epoch": 0.11,
"grad_norm": 0.921346127986908,
"learning_rate": 9.829850349754373e-06,
"loss": 0.6441,
"step": 1749
},
{
"epoch": 0.11,
"grad_norm": 0.9085983037948608,
"learning_rate": 9.829584870836756e-06,
"loss": 0.6905,
"step": 1750
},
{
"epoch": 0.11,
"grad_norm": 0.8647844195365906,
"learning_rate": 9.82931918856212e-06,
"loss": 0.6083,
"step": 1751
},
{
"epoch": 0.11,
"grad_norm": 0.9357777237892151,
"learning_rate": 9.829053302941656e-06,
"loss": 0.6546,
"step": 1752
},
{
"epoch": 0.11,
"grad_norm": 0.9371474981307983,
"learning_rate": 9.828787213986554e-06,
"loss": 0.6826,
"step": 1753
},
{
"epoch": 0.11,
"grad_norm": 0.8982768654823303,
"learning_rate": 9.82852092170802e-06,
"loss": 0.6184,
"step": 1754
},
{
"epoch": 0.11,
"grad_norm": 0.8816835284233093,
"learning_rate": 9.82825442611727e-06,
"loss": 0.6204,
"step": 1755
},
{
"epoch": 0.11,
"grad_norm": 1.0181394815444946,
"learning_rate": 9.82798772722552e-06,
"loss": 0.6937,
"step": 1756
},
{
"epoch": 0.11,
"grad_norm": 0.9692837595939636,
"learning_rate": 9.827720825044003e-06,
"loss": 0.6443,
"step": 1757
},
{
"epoch": 0.11,
"grad_norm": 0.9221457242965698,
"learning_rate": 9.827453719583957e-06,
"loss": 0.6109,
"step": 1758
},
{
"epoch": 0.11,
"grad_norm": 0.8878170847892761,
"learning_rate": 9.827186410856627e-06,
"loss": 0.5887,
"step": 1759
},
{
"epoch": 0.11,
"grad_norm": 0.9767280220985413,
"learning_rate": 9.82691889887327e-06,
"loss": 0.5916,
"step": 1760
},
{
"epoch": 0.11,
"grad_norm": 0.9061947464942932,
"learning_rate": 9.82665118364515e-06,
"loss": 0.6084,
"step": 1761
},
{
"epoch": 0.11,
"grad_norm": 0.9700713753700256,
"learning_rate": 9.82638326518354e-06,
"loss": 0.6003,
"step": 1762
},
{
"epoch": 0.11,
"grad_norm": 0.977722704410553,
"learning_rate": 9.826115143499721e-06,
"loss": 0.6788,
"step": 1763
},
{
"epoch": 0.11,
"grad_norm": 0.9503071904182434,
"learning_rate": 9.82584681860498e-06,
"loss": 0.6288,
"step": 1764
},
{
"epoch": 0.11,
"grad_norm": 0.9503450393676758,
"learning_rate": 9.82557829051062e-06,
"loss": 0.6407,
"step": 1765
},
{
"epoch": 0.11,
"grad_norm": 1.0136359930038452,
"learning_rate": 9.825309559227944e-06,
"loss": 0.7054,
"step": 1766
},
{
"epoch": 0.11,
"grad_norm": 1.0087224245071411,
"learning_rate": 9.825040624768267e-06,
"loss": 0.6528,
"step": 1767
},
{
"epoch": 0.11,
"grad_norm": 0.9373607039451599,
"learning_rate": 9.824771487142917e-06,
"loss": 0.6851,
"step": 1768
},
{
"epoch": 0.11,
"grad_norm": 0.863404393196106,
"learning_rate": 9.824502146363222e-06,
"loss": 0.6083,
"step": 1769
},
{
"epoch": 0.11,
"grad_norm": 0.9172773361206055,
"learning_rate": 9.824232602440524e-06,
"loss": 0.647,
"step": 1770
},
{
"epoch": 0.11,
"grad_norm": 0.8769250512123108,
"learning_rate": 9.823962855386175e-06,
"loss": 0.6657,
"step": 1771
},
{
"epoch": 0.11,
"grad_norm": 0.9721053838729858,
"learning_rate": 9.823692905211533e-06,
"loss": 0.5903,
"step": 1772
},
{
"epoch": 0.11,
"grad_norm": 0.9308158159255981,
"learning_rate": 9.823422751927961e-06,
"loss": 0.6218,
"step": 1773
},
{
"epoch": 0.11,
"grad_norm": 0.9058137536048889,
"learning_rate": 9.823152395546836e-06,
"loss": 0.6584,
"step": 1774
},
{
"epoch": 0.11,
"grad_norm": 0.9022964239120483,
"learning_rate": 9.822881836079543e-06,
"loss": 0.6114,
"step": 1775
},
{
"epoch": 0.11,
"grad_norm": 0.9127461910247803,
"learning_rate": 9.822611073537474e-06,
"loss": 0.653,
"step": 1776
},
{
"epoch": 0.11,
"grad_norm": 0.9295786023139954,
"learning_rate": 9.822340107932028e-06,
"loss": 0.569,
"step": 1777
},
{
"epoch": 0.11,
"grad_norm": 0.945152759552002,
"learning_rate": 9.822068939274616e-06,
"loss": 0.6499,
"step": 1778
},
{
"epoch": 0.11,
"grad_norm": 0.9397128224372864,
"learning_rate": 9.821797567576656e-06,
"loss": 0.6069,
"step": 1779
},
{
"epoch": 0.11,
"grad_norm": 1.0003842115402222,
"learning_rate": 9.821525992849575e-06,
"loss": 0.6875,
"step": 1780
},
{
"epoch": 0.11,
"grad_norm": 0.9174728393554688,
"learning_rate": 9.821254215104808e-06,
"loss": 0.673,
"step": 1781
},
{
"epoch": 0.11,
"grad_norm": 0.9309795498847961,
"learning_rate": 9.820982234353795e-06,
"loss": 0.6023,
"step": 1782
},
{
"epoch": 0.11,
"grad_norm": 0.9999585747718811,
"learning_rate": 9.820710050607994e-06,
"loss": 0.6542,
"step": 1783
},
{
"epoch": 0.11,
"grad_norm": 0.8771758675575256,
"learning_rate": 9.820437663878862e-06,
"loss": 0.632,
"step": 1784
},
{
"epoch": 0.11,
"grad_norm": 0.9545724391937256,
"learning_rate": 9.820165074177867e-06,
"loss": 0.673,
"step": 1785
},
{
"epoch": 0.11,
"grad_norm": 0.9740934371948242,
"learning_rate": 9.819892281516491e-06,
"loss": 0.6621,
"step": 1786
},
{
"epoch": 0.11,
"grad_norm": 0.9247666597366333,
"learning_rate": 9.819619285906217e-06,
"loss": 0.609,
"step": 1787
},
{
"epoch": 0.11,
"grad_norm": 0.9412689208984375,
"learning_rate": 9.819346087358542e-06,
"loss": 0.689,
"step": 1788
},
{
"epoch": 0.11,
"grad_norm": 1.0129718780517578,
"learning_rate": 9.819072685884969e-06,
"loss": 0.6117,
"step": 1789
},
{
"epoch": 0.11,
"grad_norm": 0.9594516754150391,
"learning_rate": 9.818799081497008e-06,
"loss": 0.6672,
"step": 1790
},
{
"epoch": 0.11,
"grad_norm": 0.9422503709793091,
"learning_rate": 9.818525274206184e-06,
"loss": 0.6601,
"step": 1791
},
{
"epoch": 0.11,
"grad_norm": 0.9016688466072083,
"learning_rate": 9.818251264024018e-06,
"loss": 0.6811,
"step": 1792
},
{
"epoch": 0.11,
"grad_norm": 0.9061679244041443,
"learning_rate": 9.817977050962058e-06,
"loss": 0.6095,
"step": 1793
},
{
"epoch": 0.11,
"grad_norm": 0.973602831363678,
"learning_rate": 9.817702635031842e-06,
"loss": 0.6499,
"step": 1794
},
{
"epoch": 0.11,
"grad_norm": 0.9249684810638428,
"learning_rate": 9.817428016244928e-06,
"loss": 0.6369,
"step": 1795
},
{
"epoch": 0.11,
"grad_norm": 0.9385564923286438,
"learning_rate": 9.81715319461288e-06,
"loss": 0.7354,
"step": 1796
},
{
"epoch": 0.11,
"grad_norm": 0.9195820093154907,
"learning_rate": 9.816878170147268e-06,
"loss": 0.6723,
"step": 1797
},
{
"epoch": 0.11,
"grad_norm": 1.0029053688049316,
"learning_rate": 9.816602942859672e-06,
"loss": 0.6807,
"step": 1798
},
{
"epoch": 0.11,
"grad_norm": 1.0146007537841797,
"learning_rate": 9.816327512761683e-06,
"loss": 0.6377,
"step": 1799
},
{
"epoch": 0.11,
"grad_norm": 0.9209313988685608,
"learning_rate": 9.816051879864896e-06,
"loss": 0.5904,
"step": 1800
},
{
"epoch": 0.11,
"grad_norm": 0.8723121881484985,
"learning_rate": 9.81577604418092e-06,
"loss": 0.5807,
"step": 1801
},
{
"epoch": 0.11,
"grad_norm": 0.9405813217163086,
"learning_rate": 9.815500005721365e-06,
"loss": 0.6401,
"step": 1802
},
{
"epoch": 0.11,
"grad_norm": 1.0136600732803345,
"learning_rate": 9.815223764497859e-06,
"loss": 0.6395,
"step": 1803
},
{
"epoch": 0.11,
"grad_norm": 0.8953354358673096,
"learning_rate": 9.814947320522031e-06,
"loss": 0.6236,
"step": 1804
},
{
"epoch": 0.11,
"grad_norm": 0.9782286882400513,
"learning_rate": 9.81467067380552e-06,
"loss": 0.6592,
"step": 1805
},
{
"epoch": 0.11,
"grad_norm": 0.8998913168907166,
"learning_rate": 9.814393824359975e-06,
"loss": 0.6448,
"step": 1806
},
{
"epoch": 0.11,
"grad_norm": 0.8747649788856506,
"learning_rate": 9.814116772197058e-06,
"loss": 0.6038,
"step": 1807
},
{
"epoch": 0.11,
"grad_norm": 0.980236828327179,
"learning_rate": 9.813839517328428e-06,
"loss": 0.6272,
"step": 1808
},
{
"epoch": 0.11,
"grad_norm": 0.9255844354629517,
"learning_rate": 9.813562059765762e-06,
"loss": 0.6626,
"step": 1809
},
{
"epoch": 0.11,
"grad_norm": 0.9551252722740173,
"learning_rate": 9.813284399520744e-06,
"loss": 0.6511,
"step": 1810
},
{
"epoch": 0.11,
"grad_norm": 0.9699724912643433,
"learning_rate": 9.813006536605063e-06,
"loss": 0.6487,
"step": 1811
},
{
"epoch": 0.11,
"grad_norm": 0.9553450345993042,
"learning_rate": 9.812728471030421e-06,
"loss": 0.6733,
"step": 1812
},
{
"epoch": 0.11,
"grad_norm": 0.9273084402084351,
"learning_rate": 9.812450202808525e-06,
"loss": 0.6379,
"step": 1813
},
{
"epoch": 0.11,
"grad_norm": 0.9686819911003113,
"learning_rate": 9.812171731951092e-06,
"loss": 0.6156,
"step": 1814
},
{
"epoch": 0.11,
"grad_norm": 0.9068811535835266,
"learning_rate": 9.811893058469848e-06,
"loss": 0.6301,
"step": 1815
},
{
"epoch": 0.12,
"grad_norm": 0.8845064043998718,
"learning_rate": 9.811614182376527e-06,
"loss": 0.6841,
"step": 1816
},
{
"epoch": 0.12,
"grad_norm": 0.9105836153030396,
"learning_rate": 9.811335103682872e-06,
"loss": 0.6024,
"step": 1817
},
{
"epoch": 0.12,
"grad_norm": 0.9044576287269592,
"learning_rate": 9.81105582240063e-06,
"loss": 0.6668,
"step": 1818
},
{
"epoch": 0.12,
"grad_norm": 0.9328505992889404,
"learning_rate": 9.810776338541566e-06,
"loss": 0.6684,
"step": 1819
},
{
"epoch": 0.12,
"grad_norm": 0.9516772031784058,
"learning_rate": 9.810496652117445e-06,
"loss": 0.6531,
"step": 1820
},
{
"epoch": 0.12,
"grad_norm": 0.8713773488998413,
"learning_rate": 9.810216763140046e-06,
"loss": 0.586,
"step": 1821
},
{
"epoch": 0.12,
"grad_norm": 0.9502965807914734,
"learning_rate": 9.809936671621151e-06,
"loss": 0.5754,
"step": 1822
},
{
"epoch": 0.12,
"grad_norm": 0.9351384043693542,
"learning_rate": 9.809656377572556e-06,
"loss": 0.6786,
"step": 1823
},
{
"epoch": 0.12,
"grad_norm": 0.8560097217559814,
"learning_rate": 9.809375881006063e-06,
"loss": 0.5677,
"step": 1824
},
{
"epoch": 0.12,
"grad_norm": 0.8590288162231445,
"learning_rate": 9.809095181933482e-06,
"loss": 0.6032,
"step": 1825
},
{
"epoch": 0.12,
"grad_norm": 1.0070056915283203,
"learning_rate": 9.808814280366632e-06,
"loss": 0.6919,
"step": 1826
},
{
"epoch": 0.12,
"grad_norm": 0.9655309915542603,
"learning_rate": 9.808533176317341e-06,
"loss": 0.6631,
"step": 1827
},
{
"epoch": 0.12,
"grad_norm": 1.0063858032226562,
"learning_rate": 9.808251869797445e-06,
"loss": 0.6876,
"step": 1828
},
{
"epoch": 0.12,
"grad_norm": 0.9091975092887878,
"learning_rate": 9.807970360818791e-06,
"loss": 0.6122,
"step": 1829
},
{
"epoch": 0.12,
"grad_norm": 1.0076450109481812,
"learning_rate": 9.80768864939323e-06,
"loss": 0.6995,
"step": 1830
},
{
"epoch": 0.12,
"grad_norm": 0.8727695345878601,
"learning_rate": 9.807406735532625e-06,
"loss": 0.6056,
"step": 1831
},
{
"epoch": 0.12,
"grad_norm": 0.9693520069122314,
"learning_rate": 9.807124619248847e-06,
"loss": 0.6708,
"step": 1832
},
{
"epoch": 0.12,
"grad_norm": 0.993155300617218,
"learning_rate": 9.806842300553772e-06,
"loss": 0.6415,
"step": 1833
},
{
"epoch": 0.12,
"grad_norm": 0.9352355599403381,
"learning_rate": 9.806559779459291e-06,
"loss": 0.6858,
"step": 1834
},
{
"epoch": 0.12,
"grad_norm": 0.8845545649528503,
"learning_rate": 9.806277055977299e-06,
"loss": 0.6022,
"step": 1835
},
{
"epoch": 0.12,
"grad_norm": 0.9431570768356323,
"learning_rate": 9.8059941301197e-06,
"loss": 0.6488,
"step": 1836
},
{
"epoch": 0.12,
"grad_norm": 0.9353639483451843,
"learning_rate": 9.805711001898406e-06,
"loss": 0.6399,
"step": 1837
},
{
"epoch": 0.12,
"grad_norm": 0.9036180973052979,
"learning_rate": 9.805427671325339e-06,
"loss": 0.6234,
"step": 1838
},
{
"epoch": 0.12,
"grad_norm": 0.8938383460044861,
"learning_rate": 9.80514413841243e-06,
"loss": 0.5884,
"step": 1839
},
{
"epoch": 0.12,
"grad_norm": 1.001819372177124,
"learning_rate": 9.804860403171617e-06,
"loss": 0.6657,
"step": 1840
},
{
"epoch": 0.12,
"grad_norm": 0.9125610589981079,
"learning_rate": 9.804576465614848e-06,
"loss": 0.6196,
"step": 1841
},
{
"epoch": 0.12,
"grad_norm": 0.9416166543960571,
"learning_rate": 9.804292325754079e-06,
"loss": 0.6596,
"step": 1842
},
{
"epoch": 0.12,
"grad_norm": 0.9415349960327148,
"learning_rate": 9.804007983601271e-06,
"loss": 0.6558,
"step": 1843
},
{
"epoch": 0.12,
"grad_norm": 0.8949640393257141,
"learning_rate": 9.8037234391684e-06,
"loss": 0.6629,
"step": 1844
},
{
"epoch": 0.12,
"grad_norm": 0.9415730237960815,
"learning_rate": 9.803438692467446e-06,
"loss": 0.6158,
"step": 1845
},
{
"epoch": 0.12,
"grad_norm": 0.8983997106552124,
"learning_rate": 9.8031537435104e-06,
"loss": 0.6057,
"step": 1846
},
{
"epoch": 0.12,
"grad_norm": 0.8986216187477112,
"learning_rate": 9.802868592309255e-06,
"loss": 0.6404,
"step": 1847
},
{
"epoch": 0.12,
"grad_norm": 0.9952399134635925,
"learning_rate": 9.802583238876024e-06,
"loss": 0.6655,
"step": 1848
},
{
"epoch": 0.12,
"grad_norm": 0.908902108669281,
"learning_rate": 9.80229768322272e-06,
"loss": 0.6155,
"step": 1849
},
{
"epoch": 0.12,
"grad_norm": 0.9122000932693481,
"learning_rate": 9.802011925361366e-06,
"loss": 0.6594,
"step": 1850
},
{
"epoch": 0.12,
"grad_norm": 0.970879077911377,
"learning_rate": 9.801725965303995e-06,
"loss": 0.5872,
"step": 1851
},
{
"epoch": 0.12,
"grad_norm": 0.9796939492225647,
"learning_rate": 9.801439803062646e-06,
"loss": 0.6749,
"step": 1852
},
{
"epoch": 0.12,
"grad_norm": 0.8634384274482727,
"learning_rate": 9.801153438649371e-06,
"loss": 0.6442,
"step": 1853
},
{
"epoch": 0.12,
"grad_norm": 0.9319069981575012,
"learning_rate": 9.800866872076227e-06,
"loss": 0.6265,
"step": 1854
},
{
"epoch": 0.12,
"grad_norm": 0.8886886239051819,
"learning_rate": 9.80058010335528e-06,
"loss": 0.6443,
"step": 1855
},
{
"epoch": 0.12,
"grad_norm": 0.885466992855072,
"learning_rate": 9.800293132498603e-06,
"loss": 0.6565,
"step": 1856
},
{
"epoch": 0.12,
"grad_norm": 0.9097492694854736,
"learning_rate": 9.800005959518284e-06,
"loss": 0.6244,
"step": 1857
},
{
"epoch": 0.12,
"grad_norm": 0.9563896059989929,
"learning_rate": 9.79971858442641e-06,
"loss": 0.6734,
"step": 1858
},
{
"epoch": 0.12,
"grad_norm": 0.9626286625862122,
"learning_rate": 9.799431007235086e-06,
"loss": 0.6489,
"step": 1859
},
{
"epoch": 0.12,
"grad_norm": 0.9317120313644409,
"learning_rate": 9.799143227956416e-06,
"loss": 0.6892,
"step": 1860
},
{
"epoch": 0.12,
"grad_norm": 0.9715713262557983,
"learning_rate": 9.798855246602522e-06,
"loss": 0.6715,
"step": 1861
},
{
"epoch": 0.12,
"grad_norm": 0.9262539148330688,
"learning_rate": 9.798567063185525e-06,
"loss": 0.6057,
"step": 1862
},
{
"epoch": 0.12,
"grad_norm": 0.9007180333137512,
"learning_rate": 9.798278677717562e-06,
"loss": 0.6343,
"step": 1863
},
{
"epoch": 0.12,
"grad_norm": 0.9322105646133423,
"learning_rate": 9.797990090210777e-06,
"loss": 0.6516,
"step": 1864
},
{
"epoch": 0.12,
"grad_norm": 0.8944317102432251,
"learning_rate": 9.79770130067732e-06,
"loss": 0.6814,
"step": 1865
},
{
"epoch": 0.12,
"grad_norm": 0.8687607049942017,
"learning_rate": 9.797412309129351e-06,
"loss": 0.6282,
"step": 1866
},
{
"epoch": 0.12,
"grad_norm": 0.9158695340156555,
"learning_rate": 9.79712311557904e-06,
"loss": 0.6178,
"step": 1867
},
{
"epoch": 0.12,
"grad_norm": 0.9163758754730225,
"learning_rate": 9.79683372003856e-06,
"loss": 0.6176,
"step": 1868
},
{
"epoch": 0.12,
"grad_norm": 0.9717338681221008,
"learning_rate": 9.796544122520101e-06,
"loss": 0.6533,
"step": 1869
},
{
"epoch": 0.12,
"grad_norm": 0.880803108215332,
"learning_rate": 9.796254323035854e-06,
"loss": 0.5912,
"step": 1870
},
{
"epoch": 0.12,
"grad_norm": 0.8988786935806274,
"learning_rate": 9.795964321598023e-06,
"loss": 0.6374,
"step": 1871
},
{
"epoch": 0.12,
"grad_norm": 0.870625376701355,
"learning_rate": 9.795674118218819e-06,
"loss": 0.5529,
"step": 1872
},
{
"epoch": 0.12,
"grad_norm": 0.8748095631599426,
"learning_rate": 9.795383712910458e-06,
"loss": 0.6148,
"step": 1873
},
{
"epoch": 0.12,
"grad_norm": 0.962794303894043,
"learning_rate": 9.795093105685175e-06,
"loss": 0.658,
"step": 1874
},
{
"epoch": 0.12,
"grad_norm": 0.9512926340103149,
"learning_rate": 9.794802296555198e-06,
"loss": 0.6414,
"step": 1875
},
{
"epoch": 0.12,
"grad_norm": 0.8706688284873962,
"learning_rate": 9.79451128553278e-06,
"loss": 0.575,
"step": 1876
},
{
"epoch": 0.12,
"grad_norm": 0.9835572838783264,
"learning_rate": 9.794220072630168e-06,
"loss": 0.6807,
"step": 1877
},
{
"epoch": 0.12,
"grad_norm": 0.9953154921531677,
"learning_rate": 9.793928657859627e-06,
"loss": 0.6794,
"step": 1878
},
{
"epoch": 0.12,
"grad_norm": 0.9724439382553101,
"learning_rate": 9.793637041233428e-06,
"loss": 0.6771,
"step": 1879
},
{
"epoch": 0.12,
"grad_norm": 0.9492095708847046,
"learning_rate": 9.793345222763847e-06,
"loss": 0.6477,
"step": 1880
},
{
"epoch": 0.12,
"grad_norm": 0.8991506099700928,
"learning_rate": 9.793053202463176e-06,
"loss": 0.6047,
"step": 1881
},
{
"epoch": 0.12,
"grad_norm": 1.0061862468719482,
"learning_rate": 9.792760980343708e-06,
"loss": 0.6526,
"step": 1882
},
{
"epoch": 0.12,
"grad_norm": 0.9687420725822449,
"learning_rate": 9.792468556417746e-06,
"loss": 0.593,
"step": 1883
},
{
"epoch": 0.12,
"grad_norm": 0.9339932203292847,
"learning_rate": 9.792175930697608e-06,
"loss": 0.683,
"step": 1884
},
{
"epoch": 0.12,
"grad_norm": 0.900341808795929,
"learning_rate": 9.79188310319561e-06,
"loss": 0.6468,
"step": 1885
},
{
"epoch": 0.12,
"grad_norm": 1.0057995319366455,
"learning_rate": 9.791590073924086e-06,
"loss": 0.7412,
"step": 1886
},
{
"epoch": 0.12,
"grad_norm": 0.9101889729499817,
"learning_rate": 9.79129684289537e-06,
"loss": 0.6269,
"step": 1887
},
{
"epoch": 0.12,
"grad_norm": 0.8851762413978577,
"learning_rate": 9.791003410121815e-06,
"loss": 0.6335,
"step": 1888
},
{
"epoch": 0.12,
"grad_norm": 0.9171597361564636,
"learning_rate": 9.79070977561577e-06,
"loss": 0.6272,
"step": 1889
},
{
"epoch": 0.12,
"grad_norm": 0.8824100494384766,
"learning_rate": 9.790415939389604e-06,
"loss": 0.6263,
"step": 1890
},
{
"epoch": 0.12,
"grad_norm": 0.8969504237174988,
"learning_rate": 9.790121901455687e-06,
"loss": 0.6059,
"step": 1891
},
{
"epoch": 0.12,
"grad_norm": 0.9351462125778198,
"learning_rate": 9.7898276618264e-06,
"loss": 0.5978,
"step": 1892
},
{
"epoch": 0.12,
"grad_norm": 0.8654520511627197,
"learning_rate": 9.789533220514132e-06,
"loss": 0.5934,
"step": 1893
},
{
"epoch": 0.12,
"grad_norm": 0.9118187427520752,
"learning_rate": 9.789238577531284e-06,
"loss": 0.6832,
"step": 1894
},
{
"epoch": 0.12,
"grad_norm": 0.9300076365470886,
"learning_rate": 9.788943732890258e-06,
"loss": 0.5968,
"step": 1895
},
{
"epoch": 0.12,
"grad_norm": 0.9657106995582581,
"learning_rate": 9.788648686603472e-06,
"loss": 0.6519,
"step": 1896
},
{
"epoch": 0.12,
"grad_norm": 0.9614534378051758,
"learning_rate": 9.788353438683346e-06,
"loss": 0.6579,
"step": 1897
},
{
"epoch": 0.12,
"grad_norm": 0.9790334701538086,
"learning_rate": 9.788057989142317e-06,
"loss": 0.6839,
"step": 1898
},
{
"epoch": 0.12,
"grad_norm": 0.8710220456123352,
"learning_rate": 9.787762337992821e-06,
"loss": 0.6316,
"step": 1899
},
{
"epoch": 0.12,
"grad_norm": 0.9904646873474121,
"learning_rate": 9.78746648524731e-06,
"loss": 0.6722,
"step": 1900
},
{
"epoch": 0.12,
"grad_norm": 0.858887255191803,
"learning_rate": 9.787170430918239e-06,
"loss": 0.6155,
"step": 1901
},
{
"epoch": 0.12,
"grad_norm": 0.9751510620117188,
"learning_rate": 9.786874175018073e-06,
"loss": 0.6484,
"step": 1902
},
{
"epoch": 0.12,
"grad_norm": 0.9207701086997986,
"learning_rate": 9.78657771755929e-06,
"loss": 0.6447,
"step": 1903
},
{
"epoch": 0.12,
"grad_norm": 0.8809829354286194,
"learning_rate": 9.786281058554369e-06,
"loss": 0.6035,
"step": 1904
},
{
"epoch": 0.12,
"grad_norm": 0.8563817143440247,
"learning_rate": 9.785984198015804e-06,
"loss": 0.5981,
"step": 1905
},
{
"epoch": 0.12,
"grad_norm": 0.9085856676101685,
"learning_rate": 9.785687135956092e-06,
"loss": 0.6318,
"step": 1906
},
{
"epoch": 0.12,
"grad_norm": 0.9032816290855408,
"learning_rate": 9.785389872387745e-06,
"loss": 0.5691,
"step": 1907
},
{
"epoch": 0.12,
"grad_norm": 0.9462535977363586,
"learning_rate": 9.785092407323276e-06,
"loss": 0.6807,
"step": 1908
},
{
"epoch": 0.12,
"grad_norm": 0.9293099641799927,
"learning_rate": 9.784794740775212e-06,
"loss": 0.663,
"step": 1909
},
{
"epoch": 0.12,
"grad_norm": 0.9306168556213379,
"learning_rate": 9.784496872756086e-06,
"loss": 0.6242,
"step": 1910
},
{
"epoch": 0.12,
"grad_norm": 0.9209849834442139,
"learning_rate": 9.784198803278442e-06,
"loss": 0.6387,
"step": 1911
},
{
"epoch": 0.12,
"grad_norm": 0.8757005929946899,
"learning_rate": 9.78390053235483e-06,
"loss": 0.6034,
"step": 1912
},
{
"epoch": 0.12,
"grad_norm": 0.9847443699836731,
"learning_rate": 9.783602059997808e-06,
"loss": 0.6675,
"step": 1913
},
{
"epoch": 0.12,
"grad_norm": 0.9457899928092957,
"learning_rate": 9.783303386219942e-06,
"loss": 0.6446,
"step": 1914
},
{
"epoch": 0.12,
"grad_norm": 0.9456826448440552,
"learning_rate": 9.783004511033814e-06,
"loss": 0.6877,
"step": 1915
},
{
"epoch": 0.12,
"grad_norm": 1.0723676681518555,
"learning_rate": 9.782705434452002e-06,
"loss": 0.6977,
"step": 1916
},
{
"epoch": 0.12,
"grad_norm": 0.9328003525733948,
"learning_rate": 9.782406156487104e-06,
"loss": 0.6618,
"step": 1917
},
{
"epoch": 0.12,
"grad_norm": 1.0295826196670532,
"learning_rate": 9.782106677151717e-06,
"loss": 0.674,
"step": 1918
},
{
"epoch": 0.12,
"grad_norm": 0.919144868850708,
"learning_rate": 9.781806996458456e-06,
"loss": 0.6598,
"step": 1919
},
{
"epoch": 0.12,
"grad_norm": 0.8945218324661255,
"learning_rate": 9.781507114419937e-06,
"loss": 0.6114,
"step": 1920
},
{
"epoch": 0.12,
"grad_norm": 0.9285868406295776,
"learning_rate": 9.781207031048785e-06,
"loss": 0.6274,
"step": 1921
},
{
"epoch": 0.12,
"grad_norm": 0.974398136138916,
"learning_rate": 9.78090674635764e-06,
"loss": 0.631,
"step": 1922
},
{
"epoch": 0.12,
"grad_norm": 0.8931386470794678,
"learning_rate": 9.780606260359141e-06,
"loss": 0.6597,
"step": 1923
},
{
"epoch": 0.12,
"grad_norm": 1.076026201248169,
"learning_rate": 9.780305573065945e-06,
"loss": 0.7182,
"step": 1924
},
{
"epoch": 0.12,
"grad_norm": 0.8796352744102478,
"learning_rate": 9.78000468449071e-06,
"loss": 0.6023,
"step": 1925
},
{
"epoch": 0.12,
"grad_norm": 0.9814146161079407,
"learning_rate": 9.779703594646106e-06,
"loss": 0.7202,
"step": 1926
},
{
"epoch": 0.12,
"grad_norm": 0.9146006107330322,
"learning_rate": 9.779402303544811e-06,
"loss": 0.6295,
"step": 1927
},
{
"epoch": 0.12,
"grad_norm": 0.9009500741958618,
"learning_rate": 9.77910081119951e-06,
"loss": 0.5973,
"step": 1928
},
{
"epoch": 0.12,
"grad_norm": 1.0086877346038818,
"learning_rate": 9.7787991176229e-06,
"loss": 0.6795,
"step": 1929
},
{
"epoch": 0.12,
"grad_norm": 0.8697808384895325,
"learning_rate": 9.778497222827685e-06,
"loss": 0.5967,
"step": 1930
},
{
"epoch": 0.12,
"grad_norm": 0.8777212500572205,
"learning_rate": 9.778195126826574e-06,
"loss": 0.6801,
"step": 1931
},
{
"epoch": 0.12,
"grad_norm": 0.9132078289985657,
"learning_rate": 9.777892829632288e-06,
"loss": 0.6482,
"step": 1932
},
{
"epoch": 0.12,
"grad_norm": 0.9318856000900269,
"learning_rate": 9.777590331257557e-06,
"loss": 0.7193,
"step": 1933
},
{
"epoch": 0.12,
"grad_norm": 0.9304954409599304,
"learning_rate": 9.777287631715117e-06,
"loss": 0.5924,
"step": 1934
},
{
"epoch": 0.12,
"grad_norm": 0.9359629154205322,
"learning_rate": 9.776984731017714e-06,
"loss": 0.7213,
"step": 1935
},
{
"epoch": 0.12,
"grad_norm": 0.8828993439674377,
"learning_rate": 9.7766816291781e-06,
"loss": 0.6008,
"step": 1936
},
{
"epoch": 0.12,
"grad_norm": 0.9245378375053406,
"learning_rate": 9.77637832620904e-06,
"loss": 0.6614,
"step": 1937
},
{
"epoch": 0.12,
"grad_norm": 0.9555390477180481,
"learning_rate": 9.776074822123306e-06,
"loss": 0.6417,
"step": 1938
},
{
"epoch": 0.12,
"grad_norm": 0.9190395474433899,
"learning_rate": 9.775771116933674e-06,
"loss": 0.6063,
"step": 1939
},
{
"epoch": 0.12,
"grad_norm": 0.9256815314292908,
"learning_rate": 9.775467210652936e-06,
"loss": 0.6503,
"step": 1940
},
{
"epoch": 0.12,
"grad_norm": 0.8923386335372925,
"learning_rate": 9.775163103293885e-06,
"loss": 0.6111,
"step": 1941
},
{
"epoch": 0.12,
"grad_norm": 0.9070592522621155,
"learning_rate": 9.774858794869328e-06,
"loss": 0.6668,
"step": 1942
},
{
"epoch": 0.12,
"grad_norm": 0.8983462452888489,
"learning_rate": 9.774554285392078e-06,
"loss": 0.6129,
"step": 1943
},
{
"epoch": 0.12,
"grad_norm": 0.8370616436004639,
"learning_rate": 9.774249574874957e-06,
"loss": 0.6213,
"step": 1944
},
{
"epoch": 0.12,
"grad_norm": 0.8787031769752502,
"learning_rate": 9.773944663330793e-06,
"loss": 0.6145,
"step": 1945
},
{
"epoch": 0.12,
"grad_norm": 0.9314898252487183,
"learning_rate": 9.773639550772428e-06,
"loss": 0.6159,
"step": 1946
},
{
"epoch": 0.12,
"grad_norm": 0.9421966671943665,
"learning_rate": 9.773334237212707e-06,
"loss": 0.6402,
"step": 1947
},
{
"epoch": 0.12,
"grad_norm": 0.9963151812553406,
"learning_rate": 9.773028722664486e-06,
"loss": 0.6342,
"step": 1948
},
{
"epoch": 0.12,
"grad_norm": 0.8582517504692078,
"learning_rate": 9.77272300714063e-06,
"loss": 0.6762,
"step": 1949
},
{
"epoch": 0.12,
"grad_norm": 0.905519425868988,
"learning_rate": 9.77241709065401e-06,
"loss": 0.6098,
"step": 1950
},
{
"epoch": 0.12,
"grad_norm": 0.9416316151618958,
"learning_rate": 9.772110973217512e-06,
"loss": 0.6413,
"step": 1951
},
{
"epoch": 0.12,
"grad_norm": 0.9484925270080566,
"learning_rate": 9.77180465484402e-06,
"loss": 0.6415,
"step": 1952
},
{
"epoch": 0.12,
"grad_norm": 0.8854299187660217,
"learning_rate": 9.771498135546433e-06,
"loss": 0.6387,
"step": 1953
},
{
"epoch": 0.12,
"grad_norm": 0.896232545375824,
"learning_rate": 9.77119141533766e-06,
"loss": 0.611,
"step": 1954
},
{
"epoch": 0.12,
"grad_norm": 0.9634320735931396,
"learning_rate": 9.770884494230614e-06,
"loss": 0.6216,
"step": 1955
},
{
"epoch": 0.12,
"grad_norm": 0.9145449995994568,
"learning_rate": 9.770577372238217e-06,
"loss": 0.5922,
"step": 1956
},
{
"epoch": 0.12,
"grad_norm": 0.9219470620155334,
"learning_rate": 9.770270049373403e-06,
"loss": 0.6517,
"step": 1957
},
{
"epoch": 0.12,
"grad_norm": 0.8602051734924316,
"learning_rate": 9.769962525649112e-06,
"loss": 0.57,
"step": 1958
},
{
"epoch": 0.12,
"grad_norm": 0.9885112643241882,
"learning_rate": 9.769654801078294e-06,
"loss": 0.6788,
"step": 1959
},
{
"epoch": 0.12,
"grad_norm": 0.8877094984054565,
"learning_rate": 9.769346875673903e-06,
"loss": 0.602,
"step": 1960
},
{
"epoch": 0.12,
"grad_norm": 0.9231418967247009,
"learning_rate": 9.769038749448907e-06,
"loss": 0.6285,
"step": 1961
},
{
"epoch": 0.12,
"grad_norm": 0.9032172560691833,
"learning_rate": 9.76873042241628e-06,
"loss": 0.6017,
"step": 1962
},
{
"epoch": 0.12,
"grad_norm": 0.9338173866271973,
"learning_rate": 9.768421894589003e-06,
"loss": 0.6577,
"step": 1963
},
{
"epoch": 0.12,
"grad_norm": 0.99520343542099,
"learning_rate": 9.76811316598007e-06,
"loss": 0.6539,
"step": 1964
},
{
"epoch": 0.12,
"grad_norm": 0.8935354351997375,
"learning_rate": 9.767804236602476e-06,
"loss": 0.6299,
"step": 1965
},
{
"epoch": 0.12,
"grad_norm": 0.8823718428611755,
"learning_rate": 9.767495106469233e-06,
"loss": 0.6356,
"step": 1966
},
{
"epoch": 0.12,
"grad_norm": 0.9498067498207092,
"learning_rate": 9.767185775593356e-06,
"loss": 0.6466,
"step": 1967
},
{
"epoch": 0.12,
"grad_norm": 0.9748334884643555,
"learning_rate": 9.76687624398787e-06,
"loss": 0.6011,
"step": 1968
},
{
"epoch": 0.12,
"grad_norm": 0.9265943765640259,
"learning_rate": 9.766566511665808e-06,
"loss": 0.6582,
"step": 1969
},
{
"epoch": 0.12,
"grad_norm": 0.8861657381057739,
"learning_rate": 9.766256578640212e-06,
"loss": 0.6416,
"step": 1970
},
{
"epoch": 0.12,
"grad_norm": 0.9129331707954407,
"learning_rate": 9.76594644492413e-06,
"loss": 0.6252,
"step": 1971
},
{
"epoch": 0.12,
"grad_norm": 0.9186064004898071,
"learning_rate": 9.765636110530626e-06,
"loss": 0.6536,
"step": 1972
},
{
"epoch": 0.12,
"grad_norm": 1.0016237497329712,
"learning_rate": 9.765325575472761e-06,
"loss": 0.6365,
"step": 1973
},
{
"epoch": 0.13,
"grad_norm": 0.9075548052787781,
"learning_rate": 9.765014839763616e-06,
"loss": 0.6497,
"step": 1974
},
{
"epoch": 0.13,
"grad_norm": 0.9337440729141235,
"learning_rate": 9.764703903416271e-06,
"loss": 0.6143,
"step": 1975
},
{
"epoch": 0.13,
"grad_norm": 0.9033395648002625,
"learning_rate": 9.76439276644382e-06,
"loss": 0.6251,
"step": 1976
},
{
"epoch": 0.13,
"grad_norm": 0.9360528588294983,
"learning_rate": 9.764081428859363e-06,
"loss": 0.6793,
"step": 1977
},
{
"epoch": 0.13,
"grad_norm": 0.9267653226852417,
"learning_rate": 9.763769890676011e-06,
"loss": 0.6137,
"step": 1978
},
{
"epoch": 0.13,
"grad_norm": 0.9287899136543274,
"learning_rate": 9.76345815190688e-06,
"loss": 0.6879,
"step": 1979
},
{
"epoch": 0.13,
"grad_norm": 0.9556732773780823,
"learning_rate": 9.763146212565097e-06,
"loss": 0.6544,
"step": 1980
},
{
"epoch": 0.13,
"grad_norm": 0.9532358646392822,
"learning_rate": 9.762834072663798e-06,
"loss": 0.629,
"step": 1981
},
{
"epoch": 0.13,
"grad_norm": 0.8710858821868896,
"learning_rate": 9.762521732216124e-06,
"loss": 0.6043,
"step": 1982
},
{
"epoch": 0.13,
"grad_norm": 0.9163749814033508,
"learning_rate": 9.762209191235227e-06,
"loss": 0.7024,
"step": 1983
},
{
"epoch": 0.13,
"grad_norm": 0.9370541572570801,
"learning_rate": 9.761896449734269e-06,
"loss": 0.6327,
"step": 1984
},
{
"epoch": 0.13,
"grad_norm": 0.9257699251174927,
"learning_rate": 9.761583507726416e-06,
"loss": 0.6479,
"step": 1985
},
{
"epoch": 0.13,
"grad_norm": 0.9155780673027039,
"learning_rate": 9.761270365224846e-06,
"loss": 0.6547,
"step": 1986
},
{
"epoch": 0.13,
"grad_norm": 0.8561526536941528,
"learning_rate": 9.760957022242746e-06,
"loss": 0.565,
"step": 1987
},
{
"epoch": 0.13,
"grad_norm": 1.0075304508209229,
"learning_rate": 9.760643478793305e-06,
"loss": 0.6894,
"step": 1988
},
{
"epoch": 0.13,
"grad_norm": 0.9076879620552063,
"learning_rate": 9.760329734889729e-06,
"loss": 0.6435,
"step": 1989
},
{
"epoch": 0.13,
"grad_norm": 0.9092305302619934,
"learning_rate": 9.760015790545227e-06,
"loss": 0.6086,
"step": 1990
},
{
"epoch": 0.13,
"grad_norm": 0.9335655570030212,
"learning_rate": 9.759701645773022e-06,
"loss": 0.6191,
"step": 1991
},
{
"epoch": 0.13,
"grad_norm": 0.9743184447288513,
"learning_rate": 9.759387300586336e-06,
"loss": 0.6072,
"step": 1992
},
{
"epoch": 0.13,
"grad_norm": 0.7978373169898987,
"learning_rate": 9.759072754998407e-06,
"loss": 0.558,
"step": 1993
},
{
"epoch": 0.13,
"grad_norm": 0.9291953444480896,
"learning_rate": 9.758758009022482e-06,
"loss": 0.6755,
"step": 1994
},
{
"epoch": 0.13,
"grad_norm": 0.8679872751235962,
"learning_rate": 9.758443062671809e-06,
"loss": 0.657,
"step": 1995
},
{
"epoch": 0.13,
"grad_norm": 0.9098535180091858,
"learning_rate": 9.758127915959655e-06,
"loss": 0.6372,
"step": 1996
},
{
"epoch": 0.13,
"grad_norm": 0.899311363697052,
"learning_rate": 9.757812568899285e-06,
"loss": 0.6732,
"step": 1997
},
{
"epoch": 0.13,
"grad_norm": 0.8860989809036255,
"learning_rate": 9.75749702150398e-06,
"loss": 0.6065,
"step": 1998
},
{
"epoch": 0.13,
"grad_norm": 0.9011684656143188,
"learning_rate": 9.757181273787024e-06,
"loss": 0.6352,
"step": 1999
},
{
"epoch": 0.13,
"grad_norm": 0.8687819242477417,
"learning_rate": 9.756865325761715e-06,
"loss": 0.6535,
"step": 2000
},
{
"epoch": 0.13,
"grad_norm": 0.9379962682723999,
"learning_rate": 9.756549177441354e-06,
"loss": 0.602,
"step": 2001
},
{
"epoch": 0.13,
"grad_norm": 0.9150758385658264,
"learning_rate": 9.756232828839256e-06,
"loss": 0.621,
"step": 2002
},
{
"epoch": 0.13,
"grad_norm": 0.9422044157981873,
"learning_rate": 9.755916279968738e-06,
"loss": 0.6499,
"step": 2003
},
{
"epoch": 0.13,
"grad_norm": 0.906806230545044,
"learning_rate": 9.75559953084313e-06,
"loss": 0.6025,
"step": 2004
},
{
"epoch": 0.13,
"grad_norm": 0.9322741627693176,
"learning_rate": 9.755282581475769e-06,
"loss": 0.6008,
"step": 2005
},
{
"epoch": 0.13,
"grad_norm": 0.8540508151054382,
"learning_rate": 9.75496543188e-06,
"loss": 0.6156,
"step": 2006
},
{
"epoch": 0.13,
"grad_norm": 0.8853635191917419,
"learning_rate": 9.754648082069181e-06,
"loss": 0.5934,
"step": 2007
},
{
"epoch": 0.13,
"grad_norm": 0.9560227990150452,
"learning_rate": 9.75433053205667e-06,
"loss": 0.6117,
"step": 2008
},
{
"epoch": 0.13,
"grad_norm": 0.934593915939331,
"learning_rate": 9.754012781855837e-06,
"loss": 0.6571,
"step": 2009
},
{
"epoch": 0.13,
"grad_norm": 0.9356120824813843,
"learning_rate": 9.753694831480067e-06,
"loss": 0.6051,
"step": 2010
},
{
"epoch": 0.13,
"grad_norm": 0.9165395498275757,
"learning_rate": 9.753376680942744e-06,
"loss": 0.6509,
"step": 2011
},
{
"epoch": 0.13,
"grad_norm": 0.907821536064148,
"learning_rate": 9.753058330257263e-06,
"loss": 0.6233,
"step": 2012
},
{
"epoch": 0.13,
"grad_norm": 0.924810528755188,
"learning_rate": 9.752739779437032e-06,
"loss": 0.6273,
"step": 2013
},
{
"epoch": 0.13,
"grad_norm": 0.9630839824676514,
"learning_rate": 9.752421028495461e-06,
"loss": 0.6812,
"step": 2014
},
{
"epoch": 0.13,
"grad_norm": 0.9571135640144348,
"learning_rate": 9.752102077445974e-06,
"loss": 0.6364,
"step": 2015
},
{
"epoch": 0.13,
"grad_norm": 0.9181431531906128,
"learning_rate": 9.751782926302e-06,
"loss": 0.6296,
"step": 2016
},
{
"epoch": 0.13,
"grad_norm": 0.9492517113685608,
"learning_rate": 9.751463575076977e-06,
"loss": 0.6416,
"step": 2017
},
{
"epoch": 0.13,
"grad_norm": 0.9193875789642334,
"learning_rate": 9.75114402378435e-06,
"loss": 0.6329,
"step": 2018
},
{
"epoch": 0.13,
"grad_norm": 1.0199710130691528,
"learning_rate": 9.75082427243758e-06,
"loss": 0.6989,
"step": 2019
},
{
"epoch": 0.13,
"grad_norm": 1.0100188255310059,
"learning_rate": 9.750504321050126e-06,
"loss": 0.6863,
"step": 2020
},
{
"epoch": 0.13,
"grad_norm": 0.957966685295105,
"learning_rate": 9.75018416963546e-06,
"loss": 0.6497,
"step": 2021
},
{
"epoch": 0.13,
"grad_norm": 0.9822169542312622,
"learning_rate": 9.749863818207061e-06,
"loss": 0.6548,
"step": 2022
},
{
"epoch": 0.13,
"grad_norm": 0.9881288409233093,
"learning_rate": 9.749543266778424e-06,
"loss": 0.6411,
"step": 2023
},
{
"epoch": 0.13,
"grad_norm": 0.9414603114128113,
"learning_rate": 9.749222515363041e-06,
"loss": 0.6502,
"step": 2024
},
{
"epoch": 0.13,
"grad_norm": 0.935268223285675,
"learning_rate": 9.748901563974418e-06,
"loss": 0.6292,
"step": 2025
},
{
"epoch": 0.13,
"grad_norm": 0.9612113833427429,
"learning_rate": 9.748580412626072e-06,
"loss": 0.6809,
"step": 2026
},
{
"epoch": 0.13,
"grad_norm": 0.9209766983985901,
"learning_rate": 9.748259061331524e-06,
"loss": 0.6361,
"step": 2027
},
{
"epoch": 0.13,
"grad_norm": 0.9090907573699951,
"learning_rate": 9.747937510104305e-06,
"loss": 0.6418,
"step": 2028
},
{
"epoch": 0.13,
"grad_norm": 1.0263921022415161,
"learning_rate": 9.74761575895795e-06,
"loss": 0.6523,
"step": 2029
},
{
"epoch": 0.13,
"grad_norm": 0.9746382236480713,
"learning_rate": 9.747293807906017e-06,
"loss": 0.6635,
"step": 2030
},
{
"epoch": 0.13,
"grad_norm": 0.8786625266075134,
"learning_rate": 9.746971656962053e-06,
"loss": 0.6328,
"step": 2031
},
{
"epoch": 0.13,
"grad_norm": 0.9323434233665466,
"learning_rate": 9.746649306139627e-06,
"loss": 0.6359,
"step": 2032
},
{
"epoch": 0.13,
"grad_norm": 0.9723234176635742,
"learning_rate": 9.74632675545231e-06,
"loss": 0.6284,
"step": 2033
},
{
"epoch": 0.13,
"grad_norm": 0.9053655862808228,
"learning_rate": 9.746004004913688e-06,
"loss": 0.6266,
"step": 2034
},
{
"epoch": 0.13,
"grad_norm": 0.835522472858429,
"learning_rate": 9.745681054537345e-06,
"loss": 0.5554,
"step": 2035
},
{
"epoch": 0.13,
"grad_norm": 0.9256971478462219,
"learning_rate": 9.745357904336882e-06,
"loss": 0.6404,
"step": 2036
},
{
"epoch": 0.13,
"grad_norm": 0.9099552035331726,
"learning_rate": 9.745034554325905e-06,
"loss": 0.6096,
"step": 2037
},
{
"epoch": 0.13,
"grad_norm": 0.9474760293960571,
"learning_rate": 9.74471100451803e-06,
"loss": 0.6383,
"step": 2038
},
{
"epoch": 0.13,
"grad_norm": 0.8943539261817932,
"learning_rate": 9.744387254926882e-06,
"loss": 0.6159,
"step": 2039
},
{
"epoch": 0.13,
"grad_norm": 0.9860721230506897,
"learning_rate": 9.74406330556609e-06,
"loss": 0.6816,
"step": 2040
},
{
"epoch": 0.13,
"grad_norm": 0.9628056883811951,
"learning_rate": 9.743739156449294e-06,
"loss": 0.6745,
"step": 2041
},
{
"epoch": 0.13,
"grad_norm": 0.8961864709854126,
"learning_rate": 9.743414807590145e-06,
"loss": 0.6283,
"step": 2042
},
{
"epoch": 0.13,
"grad_norm": 0.9150246381759644,
"learning_rate": 9.743090259002302e-06,
"loss": 0.6118,
"step": 2043
},
{
"epoch": 0.13,
"grad_norm": 0.9093335270881653,
"learning_rate": 9.742765510699425e-06,
"loss": 0.6072,
"step": 2044
},
{
"epoch": 0.13,
"grad_norm": 0.9687802195549011,
"learning_rate": 9.742440562695194e-06,
"loss": 0.6055,
"step": 2045
},
{
"epoch": 0.13,
"grad_norm": 0.9072078466415405,
"learning_rate": 9.742115415003288e-06,
"loss": 0.6052,
"step": 2046
},
{
"epoch": 0.13,
"grad_norm": 0.9311554431915283,
"learning_rate": 9.741790067637398e-06,
"loss": 0.7248,
"step": 2047
},
{
"epoch": 0.13,
"grad_norm": 0.9264607429504395,
"learning_rate": 9.741464520611223e-06,
"loss": 0.6605,
"step": 2048
},
{
"epoch": 0.13,
"grad_norm": 0.9030102491378784,
"learning_rate": 9.741138773938472e-06,
"loss": 0.605,
"step": 2049
},
{
"epoch": 0.13,
"grad_norm": 0.9147754907608032,
"learning_rate": 9.74081282763286e-06,
"loss": 0.6209,
"step": 2050
},
{
"epoch": 0.13,
"grad_norm": 0.9356055855751038,
"learning_rate": 9.740486681708114e-06,
"loss": 0.6877,
"step": 2051
},
{
"epoch": 0.13,
"grad_norm": 0.9533680081367493,
"learning_rate": 9.740160336177962e-06,
"loss": 0.677,
"step": 2052
},
{
"epoch": 0.13,
"grad_norm": 0.9570329189300537,
"learning_rate": 9.73983379105615e-06,
"loss": 0.6264,
"step": 2053
},
{
"epoch": 0.13,
"grad_norm": 0.8931095600128174,
"learning_rate": 9.739507046356424e-06,
"loss": 0.65,
"step": 2054
},
{
"epoch": 0.13,
"grad_norm": 0.9158161282539368,
"learning_rate": 9.739180102092544e-06,
"loss": 0.6391,
"step": 2055
},
{
"epoch": 0.13,
"grad_norm": 0.9462281465530396,
"learning_rate": 9.738852958278278e-06,
"loss": 0.6517,
"step": 2056
},
{
"epoch": 0.13,
"grad_norm": 0.9567736983299255,
"learning_rate": 9.738525614927399e-06,
"loss": 0.6521,
"step": 2057
},
{
"epoch": 0.13,
"grad_norm": 0.8848094344139099,
"learning_rate": 9.73819807205369e-06,
"loss": 0.5847,
"step": 2058
},
{
"epoch": 0.13,
"grad_norm": 0.9664223194122314,
"learning_rate": 9.737870329670942e-06,
"loss": 0.6546,
"step": 2059
},
{
"epoch": 0.13,
"grad_norm": 1.0148460865020752,
"learning_rate": 9.737542387792957e-06,
"loss": 0.6764,
"step": 2060
},
{
"epoch": 0.13,
"grad_norm": 0.9341017007827759,
"learning_rate": 9.737214246433544e-06,
"loss": 0.6472,
"step": 2061
},
{
"epoch": 0.13,
"grad_norm": 0.9515483975410461,
"learning_rate": 9.736885905606516e-06,
"loss": 0.6375,
"step": 2062
},
{
"epoch": 0.13,
"grad_norm": 0.914443850517273,
"learning_rate": 9.736557365325703e-06,
"loss": 0.6448,
"step": 2063
},
{
"epoch": 0.13,
"grad_norm": 0.8859198689460754,
"learning_rate": 9.736228625604938e-06,
"loss": 0.6072,
"step": 2064
},
{
"epoch": 0.13,
"grad_norm": 0.9599637985229492,
"learning_rate": 9.735899686458059e-06,
"loss": 0.6274,
"step": 2065
},
{
"epoch": 0.13,
"grad_norm": 0.890293538570404,
"learning_rate": 9.73557054789892e-06,
"loss": 0.6138,
"step": 2066
},
{
"epoch": 0.13,
"grad_norm": 0.9055455923080444,
"learning_rate": 9.73524120994138e-06,
"loss": 0.6443,
"step": 2067
},
{
"epoch": 0.13,
"grad_norm": 0.9043434262275696,
"learning_rate": 9.734911672599304e-06,
"loss": 0.6233,
"step": 2068
},
{
"epoch": 0.13,
"grad_norm": 0.9188245534896851,
"learning_rate": 9.73458193588657e-06,
"loss": 0.6832,
"step": 2069
},
{
"epoch": 0.13,
"grad_norm": 0.928259551525116,
"learning_rate": 9.734251999817061e-06,
"loss": 0.6329,
"step": 2070
},
{
"epoch": 0.13,
"grad_norm": 0.9406484365463257,
"learning_rate": 9.733921864404669e-06,
"loss": 0.5915,
"step": 2071
},
{
"epoch": 0.13,
"grad_norm": 1.0036033391952515,
"learning_rate": 9.733591529663295e-06,
"loss": 0.6564,
"step": 2072
},
{
"epoch": 0.13,
"grad_norm": 0.9120476245880127,
"learning_rate": 9.73326099560685e-06,
"loss": 0.5945,
"step": 2073
},
{
"epoch": 0.13,
"grad_norm": 0.8819807767868042,
"learning_rate": 9.732930262249249e-06,
"loss": 0.5899,
"step": 2074
},
{
"epoch": 0.13,
"grad_norm": 0.953350841999054,
"learning_rate": 9.73259932960442e-06,
"loss": 0.6316,
"step": 2075
},
{
"epoch": 0.13,
"grad_norm": 0.9032095074653625,
"learning_rate": 9.732268197686296e-06,
"loss": 0.6505,
"step": 2076
},
{
"epoch": 0.13,
"grad_norm": 0.9567223787307739,
"learning_rate": 9.731936866508822e-06,
"loss": 0.6194,
"step": 2077
},
{
"epoch": 0.13,
"grad_norm": 0.980812132358551,
"learning_rate": 9.731605336085947e-06,
"loss": 0.6625,
"step": 2078
},
{
"epoch": 0.13,
"grad_norm": 0.8719442486763,
"learning_rate": 9.73127360643163e-06,
"loss": 0.6017,
"step": 2079
},
{
"epoch": 0.13,
"grad_norm": 0.9464851021766663,
"learning_rate": 9.730941677559843e-06,
"loss": 0.6396,
"step": 2080
},
{
"epoch": 0.13,
"grad_norm": 0.9146105051040649,
"learning_rate": 9.730609549484558e-06,
"loss": 0.672,
"step": 2081
},
{
"epoch": 0.13,
"grad_norm": 0.8443781733512878,
"learning_rate": 9.730277222219762e-06,
"loss": 0.5547,
"step": 2082
},
{
"epoch": 0.13,
"grad_norm": 0.9289159774780273,
"learning_rate": 9.729944695779448e-06,
"loss": 0.6482,
"step": 2083
},
{
"epoch": 0.13,
"grad_norm": 0.9049432873725891,
"learning_rate": 9.729611970177615e-06,
"loss": 0.5846,
"step": 2084
},
{
"epoch": 0.13,
"grad_norm": 0.9440188407897949,
"learning_rate": 9.729279045428277e-06,
"loss": 0.6773,
"step": 2085
},
{
"epoch": 0.13,
"grad_norm": 0.8901217579841614,
"learning_rate": 9.72894592154545e-06,
"loss": 0.6139,
"step": 2086
},
{
"epoch": 0.13,
"grad_norm": 0.9457870125770569,
"learning_rate": 9.72861259854316e-06,
"loss": 0.5882,
"step": 2087
},
{
"epoch": 0.13,
"grad_norm": 0.8994535207748413,
"learning_rate": 9.728279076435446e-06,
"loss": 0.5914,
"step": 2088
},
{
"epoch": 0.13,
"grad_norm": 0.9447741508483887,
"learning_rate": 9.727945355236345e-06,
"loss": 0.5963,
"step": 2089
},
{
"epoch": 0.13,
"grad_norm": 0.9336423277854919,
"learning_rate": 9.727611434959914e-06,
"loss": 0.6375,
"step": 2090
},
{
"epoch": 0.13,
"grad_norm": 0.9569306969642639,
"learning_rate": 9.727277315620212e-06,
"loss": 0.6664,
"step": 2091
},
{
"epoch": 0.13,
"grad_norm": 0.8650494813919067,
"learning_rate": 9.726942997231308e-06,
"loss": 0.6182,
"step": 2092
},
{
"epoch": 0.13,
"grad_norm": 0.8966164588928223,
"learning_rate": 9.726608479807278e-06,
"loss": 0.6107,
"step": 2093
},
{
"epoch": 0.13,
"grad_norm": 0.9104940295219421,
"learning_rate": 9.726273763362206e-06,
"loss": 0.6679,
"step": 2094
},
{
"epoch": 0.13,
"grad_norm": 0.9482932686805725,
"learning_rate": 9.725938847910187e-06,
"loss": 0.65,
"step": 2095
},
{
"epoch": 0.13,
"grad_norm": 0.8956882357597351,
"learning_rate": 9.725603733465325e-06,
"loss": 0.6163,
"step": 2096
},
{
"epoch": 0.13,
"grad_norm": 0.9436829686164856,
"learning_rate": 9.725268420041728e-06,
"loss": 0.6822,
"step": 2097
},
{
"epoch": 0.13,
"grad_norm": 0.9164643883705139,
"learning_rate": 9.724932907653516e-06,
"loss": 0.6358,
"step": 2098
},
{
"epoch": 0.13,
"grad_norm": 0.8321818113327026,
"learning_rate": 9.724597196314817e-06,
"loss": 0.5925,
"step": 2099
},
{
"epoch": 0.13,
"grad_norm": 0.8644357919692993,
"learning_rate": 9.724261286039766e-06,
"loss": 0.6181,
"step": 2100
},
{
"epoch": 0.13,
"grad_norm": 0.9723076820373535,
"learning_rate": 9.723925176842506e-06,
"loss": 0.6353,
"step": 2101
},
{
"epoch": 0.13,
"grad_norm": 0.9631821513175964,
"learning_rate": 9.72358886873719e-06,
"loss": 0.6319,
"step": 2102
},
{
"epoch": 0.13,
"grad_norm": 0.8844379782676697,
"learning_rate": 9.723252361737977e-06,
"loss": 0.6011,
"step": 2103
},
{
"epoch": 0.13,
"grad_norm": 0.8965840935707092,
"learning_rate": 9.722915655859042e-06,
"loss": 0.6657,
"step": 2104
},
{
"epoch": 0.13,
"grad_norm": 0.878998875617981,
"learning_rate": 9.722578751114556e-06,
"loss": 0.609,
"step": 2105
},
{
"epoch": 0.13,
"grad_norm": 0.9274184107780457,
"learning_rate": 9.72224164751871e-06,
"loss": 0.6772,
"step": 2106
},
{
"epoch": 0.13,
"grad_norm": 0.9545007944107056,
"learning_rate": 9.721904345085692e-06,
"loss": 0.7068,
"step": 2107
},
{
"epoch": 0.13,
"grad_norm": 0.8995804786682129,
"learning_rate": 9.72156684382971e-06,
"loss": 0.594,
"step": 2108
},
{
"epoch": 0.13,
"grad_norm": 0.8433576822280884,
"learning_rate": 9.721229143764975e-06,
"loss": 0.5871,
"step": 2109
},
{
"epoch": 0.13,
"grad_norm": 0.8701801300048828,
"learning_rate": 9.720891244905701e-06,
"loss": 0.6579,
"step": 2110
},
{
"epoch": 0.13,
"grad_norm": 0.9737119078636169,
"learning_rate": 9.720553147266123e-06,
"loss": 0.6812,
"step": 2111
},
{
"epoch": 0.13,
"grad_norm": 0.9231463074684143,
"learning_rate": 9.720214850860473e-06,
"loss": 0.6731,
"step": 2112
},
{
"epoch": 0.13,
"grad_norm": 0.9718281030654907,
"learning_rate": 9.719876355702993e-06,
"loss": 0.6299,
"step": 2113
},
{
"epoch": 0.13,
"grad_norm": 0.9555742144584656,
"learning_rate": 9.719537661807942e-06,
"loss": 0.6401,
"step": 2114
},
{
"epoch": 0.13,
"grad_norm": 0.9553225636482239,
"learning_rate": 9.719198769189577e-06,
"loss": 0.5957,
"step": 2115
},
{
"epoch": 0.13,
"grad_norm": 0.9114801287651062,
"learning_rate": 9.718859677862169e-06,
"loss": 0.6514,
"step": 2116
},
{
"epoch": 0.13,
"grad_norm": 0.9563703536987305,
"learning_rate": 9.718520387839996e-06,
"loss": 0.6367,
"step": 2117
},
{
"epoch": 0.13,
"grad_norm": 0.9876435399055481,
"learning_rate": 9.718180899137344e-06,
"loss": 0.6547,
"step": 2118
},
{
"epoch": 0.13,
"grad_norm": 0.91056889295578,
"learning_rate": 9.717841211768505e-06,
"loss": 0.6011,
"step": 2119
},
{
"epoch": 0.13,
"grad_norm": 0.9514434337615967,
"learning_rate": 9.71750132574779e-06,
"loss": 0.6384,
"step": 2120
},
{
"epoch": 0.13,
"grad_norm": 0.97725510597229,
"learning_rate": 9.717161241089501e-06,
"loss": 0.6478,
"step": 2121
},
{
"epoch": 0.13,
"grad_norm": 0.8876969814300537,
"learning_rate": 9.716820957807963e-06,
"loss": 0.6272,
"step": 2122
},
{
"epoch": 0.13,
"grad_norm": 0.9173263311386108,
"learning_rate": 9.716480475917504e-06,
"loss": 0.6316,
"step": 2123
},
{
"epoch": 0.13,
"grad_norm": 0.898065984249115,
"learning_rate": 9.716139795432459e-06,
"loss": 0.6161,
"step": 2124
},
{
"epoch": 0.13,
"grad_norm": 0.9070072770118713,
"learning_rate": 9.715798916367174e-06,
"loss": 0.6324,
"step": 2125
},
{
"epoch": 0.13,
"grad_norm": 0.871792733669281,
"learning_rate": 9.715457838736e-06,
"loss": 0.6324,
"step": 2126
},
{
"epoch": 0.13,
"grad_norm": 0.8872711658477783,
"learning_rate": 9.715116562553302e-06,
"loss": 0.6484,
"step": 2127
},
{
"epoch": 0.13,
"grad_norm": 0.903907299041748,
"learning_rate": 9.714775087833446e-06,
"loss": 0.6243,
"step": 2128
},
{
"epoch": 0.13,
"grad_norm": 0.9561790227890015,
"learning_rate": 9.714433414590816e-06,
"loss": 0.6232,
"step": 2129
},
{
"epoch": 0.13,
"grad_norm": 0.9328345656394958,
"learning_rate": 9.714091542839792e-06,
"loss": 0.6487,
"step": 2130
},
{
"epoch": 0.14,
"grad_norm": 0.8860450387001038,
"learning_rate": 9.713749472594773e-06,
"loss": 0.6337,
"step": 2131
},
{
"epoch": 0.14,
"grad_norm": 0.9177609086036682,
"learning_rate": 9.713407203870163e-06,
"loss": 0.6369,
"step": 2132
},
{
"epoch": 0.14,
"grad_norm": 0.8619222640991211,
"learning_rate": 9.713064736680372e-06,
"loss": 0.5794,
"step": 2133
},
{
"epoch": 0.14,
"grad_norm": 0.9057930707931519,
"learning_rate": 9.71272207103982e-06,
"loss": 0.5988,
"step": 2134
},
{
"epoch": 0.14,
"grad_norm": 0.9182736873626709,
"learning_rate": 9.712379206962936e-06,
"loss": 0.7029,
"step": 2135
},
{
"epoch": 0.14,
"grad_norm": 0.8596693277359009,
"learning_rate": 9.712036144464157e-06,
"loss": 0.598,
"step": 2136
},
{
"epoch": 0.14,
"grad_norm": 0.9068416953086853,
"learning_rate": 9.711692883557928e-06,
"loss": 0.6706,
"step": 2137
},
{
"epoch": 0.14,
"grad_norm": 0.9520207643508911,
"learning_rate": 9.711349424258702e-06,
"loss": 0.6514,
"step": 2138
},
{
"epoch": 0.14,
"grad_norm": 0.8763338327407837,
"learning_rate": 9.711005766580942e-06,
"loss": 0.5938,
"step": 2139
},
{
"epoch": 0.14,
"grad_norm": 0.9876976013183594,
"learning_rate": 9.710661910539117e-06,
"loss": 0.6066,
"step": 2140
},
{
"epoch": 0.14,
"grad_norm": 0.9245547652244568,
"learning_rate": 9.710317856147707e-06,
"loss": 0.6224,
"step": 2141
},
{
"epoch": 0.14,
"grad_norm": 0.8814427852630615,
"learning_rate": 9.709973603421196e-06,
"loss": 0.5874,
"step": 2142
},
{
"epoch": 0.14,
"grad_norm": 0.8934566378593445,
"learning_rate": 9.709629152374084e-06,
"loss": 0.6272,
"step": 2143
},
{
"epoch": 0.14,
"grad_norm": 0.8956682682037354,
"learning_rate": 9.70928450302087e-06,
"loss": 0.6705,
"step": 2144
},
{
"epoch": 0.14,
"grad_norm": 0.8444738984107971,
"learning_rate": 9.708939655376069e-06,
"loss": 0.5762,
"step": 2145
},
{
"epoch": 0.14,
"grad_norm": 0.9432147145271301,
"learning_rate": 9.708594609454201e-06,
"loss": 0.6278,
"step": 2146
},
{
"epoch": 0.14,
"grad_norm": 0.872987687587738,
"learning_rate": 9.708249365269793e-06,
"loss": 0.6689,
"step": 2147
},
{
"epoch": 0.14,
"grad_norm": 0.9760878086090088,
"learning_rate": 9.707903922837382e-06,
"loss": 0.665,
"step": 2148
},
{
"epoch": 0.14,
"grad_norm": 1.0128514766693115,
"learning_rate": 9.707558282171517e-06,
"loss": 0.7365,
"step": 2149
},
{
"epoch": 0.14,
"grad_norm": 0.9022131562232971,
"learning_rate": 9.707212443286746e-06,
"loss": 0.6428,
"step": 2150
},
{
"epoch": 0.14,
"grad_norm": 0.9122216105461121,
"learning_rate": 9.706866406197637e-06,
"loss": 0.617,
"step": 2151
},
{
"epoch": 0.14,
"grad_norm": 0.9093108773231506,
"learning_rate": 9.706520170918756e-06,
"loss": 0.6826,
"step": 2152
},
{
"epoch": 0.14,
"grad_norm": 0.9202459454536438,
"learning_rate": 9.706173737464683e-06,
"loss": 0.5962,
"step": 2153
},
{
"epoch": 0.14,
"grad_norm": 0.9246529936790466,
"learning_rate": 9.705827105850008e-06,
"loss": 0.6299,
"step": 2154
},
{
"epoch": 0.14,
"grad_norm": 0.9624621868133545,
"learning_rate": 9.705480276089323e-06,
"loss": 0.5714,
"step": 2155
},
{
"epoch": 0.14,
"grad_norm": 0.8589086532592773,
"learning_rate": 9.705133248197232e-06,
"loss": 0.6083,
"step": 2156
},
{
"epoch": 0.14,
"grad_norm": 0.8764198422431946,
"learning_rate": 9.704786022188346e-06,
"loss": 0.6235,
"step": 2157
},
{
"epoch": 0.14,
"grad_norm": 0.9193335771560669,
"learning_rate": 9.704438598077291e-06,
"loss": 0.5962,
"step": 2158
},
{
"epoch": 0.14,
"grad_norm": 0.8846250176429749,
"learning_rate": 9.70409097587869e-06,
"loss": 0.6036,
"step": 2159
},
{
"epoch": 0.14,
"grad_norm": 0.8875699043273926,
"learning_rate": 9.703743155607182e-06,
"loss": 0.5966,
"step": 2160
},
{
"epoch": 0.14,
"grad_norm": 0.9193180799484253,
"learning_rate": 9.703395137277414e-06,
"loss": 0.6257,
"step": 2161
},
{
"epoch": 0.14,
"grad_norm": 0.9386597275733948,
"learning_rate": 9.703046920904038e-06,
"loss": 0.6208,
"step": 2162
},
{
"epoch": 0.14,
"grad_norm": 1.0000402927398682,
"learning_rate": 9.702698506501717e-06,
"loss": 0.6224,
"step": 2163
},
{
"epoch": 0.14,
"grad_norm": 0.9253415465354919,
"learning_rate": 9.702349894085122e-06,
"loss": 0.6126,
"step": 2164
},
{
"epoch": 0.14,
"grad_norm": 0.9393420219421387,
"learning_rate": 9.702001083668931e-06,
"loss": 0.6636,
"step": 2165
},
{
"epoch": 0.14,
"grad_norm": 0.9388704299926758,
"learning_rate": 9.701652075267832e-06,
"loss": 0.6852,
"step": 2166
},
{
"epoch": 0.14,
"grad_norm": 0.9847069382667542,
"learning_rate": 9.701302868896518e-06,
"loss": 0.6375,
"step": 2167
},
{
"epoch": 0.14,
"grad_norm": 0.8902013301849365,
"learning_rate": 9.700953464569698e-06,
"loss": 0.6506,
"step": 2168
},
{
"epoch": 0.14,
"grad_norm": 0.8558127880096436,
"learning_rate": 9.700603862302078e-06,
"loss": 0.6022,
"step": 2169
},
{
"epoch": 0.14,
"grad_norm": 0.9520554542541504,
"learning_rate": 9.700254062108383e-06,
"loss": 0.6399,
"step": 2170
},
{
"epoch": 0.14,
"grad_norm": 0.90887451171875,
"learning_rate": 9.69990406400334e-06,
"loss": 0.6224,
"step": 2171
},
{
"epoch": 0.14,
"grad_norm": 0.9194844365119934,
"learning_rate": 9.699553868001688e-06,
"loss": 0.6657,
"step": 2172
},
{
"epoch": 0.14,
"grad_norm": 0.9208309054374695,
"learning_rate": 9.699203474118168e-06,
"loss": 0.5959,
"step": 2173
},
{
"epoch": 0.14,
"grad_norm": 0.9791555404663086,
"learning_rate": 9.69885288236754e-06,
"loss": 0.6241,
"step": 2174
},
{
"epoch": 0.14,
"grad_norm": 0.94879150390625,
"learning_rate": 9.698502092764562e-06,
"loss": 0.6352,
"step": 2175
},
{
"epoch": 0.14,
"grad_norm": 0.919954240322113,
"learning_rate": 9.698151105324006e-06,
"loss": 0.5973,
"step": 2176
},
{
"epoch": 0.14,
"grad_norm": 0.863923192024231,
"learning_rate": 9.697799920060651e-06,
"loss": 0.6602,
"step": 2177
},
{
"epoch": 0.14,
"grad_norm": 0.861292839050293,
"learning_rate": 9.697448536989284e-06,
"loss": 0.6121,
"step": 2178
},
{
"epoch": 0.14,
"grad_norm": 0.8456393480300903,
"learning_rate": 9.697096956124699e-06,
"loss": 0.6424,
"step": 2179
},
{
"epoch": 0.14,
"grad_norm": 0.9356247186660767,
"learning_rate": 9.696745177481703e-06,
"loss": 0.5969,
"step": 2180
},
{
"epoch": 0.14,
"grad_norm": 0.8527323007583618,
"learning_rate": 9.696393201075105e-06,
"loss": 0.6084,
"step": 2181
},
{
"epoch": 0.14,
"grad_norm": 0.8870608806610107,
"learning_rate": 9.696041026919727e-06,
"loss": 0.5883,
"step": 2182
},
{
"epoch": 0.14,
"grad_norm": 0.9454874992370605,
"learning_rate": 9.695688655030397e-06,
"loss": 0.6827,
"step": 2183
},
{
"epoch": 0.14,
"grad_norm": 0.9070287942886353,
"learning_rate": 9.695336085421953e-06,
"loss": 0.6381,
"step": 2184
},
{
"epoch": 0.14,
"grad_norm": 0.8830955624580383,
"learning_rate": 9.694983318109242e-06,
"loss": 0.6082,
"step": 2185
},
{
"epoch": 0.14,
"grad_norm": 0.8843045830726624,
"learning_rate": 9.694630353107115e-06,
"loss": 0.6811,
"step": 2186
},
{
"epoch": 0.14,
"grad_norm": 0.9212061166763306,
"learning_rate": 9.694277190430437e-06,
"loss": 0.6432,
"step": 2187
},
{
"epoch": 0.14,
"grad_norm": 0.8803088068962097,
"learning_rate": 9.693923830094074e-06,
"loss": 0.6415,
"step": 2188
},
{
"epoch": 0.14,
"grad_norm": 0.9358056783676147,
"learning_rate": 9.693570272112908e-06,
"loss": 0.661,
"step": 2189
},
{
"epoch": 0.14,
"grad_norm": 0.9013800024986267,
"learning_rate": 9.693216516501827e-06,
"loss": 0.6218,
"step": 2190
},
{
"epoch": 0.14,
"grad_norm": 0.944242000579834,
"learning_rate": 9.692862563275725e-06,
"loss": 0.6864,
"step": 2191
},
{
"epoch": 0.14,
"grad_norm": 0.8731442093849182,
"learning_rate": 9.692508412449505e-06,
"loss": 0.6566,
"step": 2192
},
{
"epoch": 0.14,
"grad_norm": 0.9164943099021912,
"learning_rate": 9.692154064038079e-06,
"loss": 0.6176,
"step": 2193
},
{
"epoch": 0.14,
"grad_norm": 0.8906972408294678,
"learning_rate": 9.691799518056369e-06,
"loss": 0.6736,
"step": 2194
},
{
"epoch": 0.14,
"grad_norm": 0.927988588809967,
"learning_rate": 9.691444774519302e-06,
"loss": 0.6201,
"step": 2195
},
{
"epoch": 0.14,
"grad_norm": 0.9599518775939941,
"learning_rate": 9.691089833441818e-06,
"loss": 0.662,
"step": 2196
},
{
"epoch": 0.14,
"grad_norm": 0.842663049697876,
"learning_rate": 9.69073469483886e-06,
"loss": 0.6176,
"step": 2197
},
{
"epoch": 0.14,
"grad_norm": 0.9649078845977783,
"learning_rate": 9.690379358725379e-06,
"loss": 0.6787,
"step": 2198
},
{
"epoch": 0.14,
"grad_norm": 0.9011525511741638,
"learning_rate": 9.69002382511634e-06,
"loss": 0.6612,
"step": 2199
},
{
"epoch": 0.14,
"grad_norm": 0.8732843399047852,
"learning_rate": 9.689668094026716e-06,
"loss": 0.6005,
"step": 2200
},
{
"epoch": 0.14,
"grad_norm": 0.9425661563873291,
"learning_rate": 9.689312165471483e-06,
"loss": 0.5944,
"step": 2201
},
{
"epoch": 0.14,
"grad_norm": 0.8813802003860474,
"learning_rate": 9.688956039465626e-06,
"loss": 0.6291,
"step": 2202
},
{
"epoch": 0.14,
"grad_norm": 0.9538077712059021,
"learning_rate": 9.688599716024141e-06,
"loss": 0.6214,
"step": 2203
},
{
"epoch": 0.14,
"grad_norm": 0.8900435566902161,
"learning_rate": 9.688243195162033e-06,
"loss": 0.62,
"step": 2204
},
{
"epoch": 0.14,
"grad_norm": 0.8894834518432617,
"learning_rate": 9.687886476894314e-06,
"loss": 0.5676,
"step": 2205
},
{
"epoch": 0.14,
"grad_norm": 1.0278310775756836,
"learning_rate": 9.687529561236004e-06,
"loss": 0.6704,
"step": 2206
},
{
"epoch": 0.14,
"grad_norm": 0.9716306924819946,
"learning_rate": 9.687172448202129e-06,
"loss": 0.6479,
"step": 2207
},
{
"epoch": 0.14,
"grad_norm": 0.8720564246177673,
"learning_rate": 9.68681513780773e-06,
"loss": 0.6445,
"step": 2208
},
{
"epoch": 0.14,
"grad_norm": 0.9259105324745178,
"learning_rate": 9.686457630067848e-06,
"loss": 0.6582,
"step": 2209
},
{
"epoch": 0.14,
"grad_norm": 0.9476026892662048,
"learning_rate": 9.686099924997538e-06,
"loss": 0.6086,
"step": 2210
},
{
"epoch": 0.14,
"grad_norm": 0.8634487390518188,
"learning_rate": 9.685742022611864e-06,
"loss": 0.5746,
"step": 2211
},
{
"epoch": 0.14,
"grad_norm": 0.9387729167938232,
"learning_rate": 9.685383922925892e-06,
"loss": 0.6432,
"step": 2212
},
{
"epoch": 0.14,
"grad_norm": 0.897686779499054,
"learning_rate": 9.685025625954703e-06,
"loss": 0.6607,
"step": 2213
},
{
"epoch": 0.14,
"grad_norm": 0.9364752769470215,
"learning_rate": 9.684667131713381e-06,
"loss": 0.6227,
"step": 2214
},
{
"epoch": 0.14,
"grad_norm": 0.9455356597900391,
"learning_rate": 9.684308440217026e-06,
"loss": 0.6756,
"step": 2215
},
{
"epoch": 0.14,
"grad_norm": 0.9486604928970337,
"learning_rate": 9.683949551480736e-06,
"loss": 0.5791,
"step": 2216
},
{
"epoch": 0.14,
"grad_norm": 0.9534194469451904,
"learning_rate": 9.683590465519625e-06,
"loss": 0.6675,
"step": 2217
},
{
"epoch": 0.14,
"grad_norm": 0.9059990048408508,
"learning_rate": 9.683231182348813e-06,
"loss": 0.6342,
"step": 2218
},
{
"epoch": 0.14,
"grad_norm": 0.8662623167037964,
"learning_rate": 9.682871701983428e-06,
"loss": 0.6061,
"step": 2219
},
{
"epoch": 0.14,
"grad_norm": 1.0336980819702148,
"learning_rate": 9.682512024438607e-06,
"loss": 0.6481,
"step": 2220
},
{
"epoch": 0.14,
"grad_norm": 0.9470313787460327,
"learning_rate": 9.682152149729491e-06,
"loss": 0.679,
"step": 2221
},
{
"epoch": 0.14,
"grad_norm": 0.9740751385688782,
"learning_rate": 9.681792077871238e-06,
"loss": 0.6369,
"step": 2222
},
{
"epoch": 0.14,
"grad_norm": 0.9472583532333374,
"learning_rate": 9.681431808879007e-06,
"loss": 0.6351,
"step": 2223
},
{
"epoch": 0.14,
"grad_norm": 0.9514747262001038,
"learning_rate": 9.681071342767967e-06,
"loss": 0.6496,
"step": 2224
},
{
"epoch": 0.14,
"grad_norm": 0.9216861724853516,
"learning_rate": 9.6807106795533e-06,
"loss": 0.6447,
"step": 2225
},
{
"epoch": 0.14,
"grad_norm": 0.9888139367103577,
"learning_rate": 9.680349819250185e-06,
"loss": 0.6117,
"step": 2226
},
{
"epoch": 0.14,
"grad_norm": 0.9336743354797363,
"learning_rate": 9.679988761873824e-06,
"loss": 0.609,
"step": 2227
},
{
"epoch": 0.14,
"grad_norm": 0.8719781637191772,
"learning_rate": 9.679627507439416e-06,
"loss": 0.6476,
"step": 2228
},
{
"epoch": 0.14,
"grad_norm": 0.8691688179969788,
"learning_rate": 9.679266055962174e-06,
"loss": 0.6706,
"step": 2229
},
{
"epoch": 0.14,
"grad_norm": 0.8492668271064758,
"learning_rate": 9.678904407457314e-06,
"loss": 0.621,
"step": 2230
},
{
"epoch": 0.14,
"grad_norm": 0.9780930876731873,
"learning_rate": 9.678542561940067e-06,
"loss": 0.6739,
"step": 2231
},
{
"epoch": 0.14,
"grad_norm": 0.983424186706543,
"learning_rate": 9.678180519425669e-06,
"loss": 0.6331,
"step": 2232
},
{
"epoch": 0.14,
"grad_norm": 0.8755106329917908,
"learning_rate": 9.677818279929363e-06,
"loss": 0.5712,
"step": 2233
},
{
"epoch": 0.14,
"grad_norm": 0.8746523857116699,
"learning_rate": 9.677455843466402e-06,
"loss": 0.6365,
"step": 2234
},
{
"epoch": 0.14,
"grad_norm": 0.9087699055671692,
"learning_rate": 9.677093210052048e-06,
"loss": 0.5855,
"step": 2235
},
{
"epoch": 0.14,
"grad_norm": 0.8961308598518372,
"learning_rate": 9.676730379701567e-06,
"loss": 0.6563,
"step": 2236
},
{
"epoch": 0.14,
"grad_norm": 0.917649507522583,
"learning_rate": 9.676367352430242e-06,
"loss": 0.6651,
"step": 2237
},
{
"epoch": 0.14,
"grad_norm": 0.8809880018234253,
"learning_rate": 9.676004128253354e-06,
"loss": 0.6311,
"step": 2238
},
{
"epoch": 0.14,
"grad_norm": 0.946129322052002,
"learning_rate": 9.675640707186199e-06,
"loss": 0.6366,
"step": 2239
},
{
"epoch": 0.14,
"grad_norm": 0.920985221862793,
"learning_rate": 9.67527708924408e-06,
"loss": 0.5958,
"step": 2240
},
{
"epoch": 0.14,
"grad_norm": 0.8754940629005432,
"learning_rate": 9.674913274442305e-06,
"loss": 0.6765,
"step": 2241
},
{
"epoch": 0.14,
"grad_norm": 0.9010186791419983,
"learning_rate": 9.674549262796196e-06,
"loss": 0.6457,
"step": 2242
},
{
"epoch": 0.14,
"grad_norm": 0.8508507013320923,
"learning_rate": 9.674185054321079e-06,
"loss": 0.5684,
"step": 2243
},
{
"epoch": 0.14,
"grad_norm": 0.9368433952331543,
"learning_rate": 9.67382064903229e-06,
"loss": 0.6338,
"step": 2244
},
{
"epoch": 0.14,
"grad_norm": 1.0419481992721558,
"learning_rate": 9.67345604694517e-06,
"loss": 0.6102,
"step": 2245
},
{
"epoch": 0.14,
"grad_norm": 0.9164296984672546,
"learning_rate": 9.673091248075077e-06,
"loss": 0.6279,
"step": 2246
},
{
"epoch": 0.14,
"grad_norm": 0.9411850571632385,
"learning_rate": 9.672726252437368e-06,
"loss": 0.6252,
"step": 2247
},
{
"epoch": 0.14,
"grad_norm": 0.8734287023544312,
"learning_rate": 9.67236106004741e-06,
"loss": 0.6179,
"step": 2248
},
{
"epoch": 0.14,
"grad_norm": 0.8806835412979126,
"learning_rate": 9.671995670920582e-06,
"loss": 0.5995,
"step": 2249
},
{
"epoch": 0.14,
"grad_norm": 0.9245673418045044,
"learning_rate": 9.671630085072268e-06,
"loss": 0.6239,
"step": 2250
},
{
"epoch": 0.14,
"grad_norm": 0.941852331161499,
"learning_rate": 9.671264302517864e-06,
"loss": 0.656,
"step": 2251
},
{
"epoch": 0.14,
"grad_norm": 0.8769700527191162,
"learning_rate": 9.67089832327277e-06,
"loss": 0.6233,
"step": 2252
},
{
"epoch": 0.14,
"grad_norm": 0.9232833385467529,
"learning_rate": 9.670532147352399e-06,
"loss": 0.6819,
"step": 2253
},
{
"epoch": 0.14,
"grad_norm": 0.9096298813819885,
"learning_rate": 9.670165774772164e-06,
"loss": 0.6606,
"step": 2254
},
{
"epoch": 0.14,
"grad_norm": 0.8869082927703857,
"learning_rate": 9.669799205547494e-06,
"loss": 0.6617,
"step": 2255
},
{
"epoch": 0.14,
"grad_norm": 0.8901436924934387,
"learning_rate": 9.669432439693827e-06,
"loss": 0.6609,
"step": 2256
},
{
"epoch": 0.14,
"grad_norm": 0.9619342088699341,
"learning_rate": 9.669065477226602e-06,
"loss": 0.6758,
"step": 2257
},
{
"epoch": 0.14,
"grad_norm": 0.9121052026748657,
"learning_rate": 9.668698318161271e-06,
"loss": 0.6162,
"step": 2258
},
{
"epoch": 0.14,
"grad_norm": 0.9086534380912781,
"learning_rate": 9.668330962513297e-06,
"loss": 0.6098,
"step": 2259
},
{
"epoch": 0.14,
"grad_norm": 1.0334198474884033,
"learning_rate": 9.667963410298147e-06,
"loss": 0.6498,
"step": 2260
},
{
"epoch": 0.14,
"grad_norm": 0.942879319190979,
"learning_rate": 9.667595661531294e-06,
"loss": 0.6464,
"step": 2261
},
{
"epoch": 0.14,
"grad_norm": 0.8824305534362793,
"learning_rate": 9.667227716228228e-06,
"loss": 0.6543,
"step": 2262
},
{
"epoch": 0.14,
"grad_norm": 0.8903138041496277,
"learning_rate": 9.666859574404434e-06,
"loss": 0.6208,
"step": 2263
},
{
"epoch": 0.14,
"grad_norm": 0.9194402694702148,
"learning_rate": 9.666491236075423e-06,
"loss": 0.6187,
"step": 2264
},
{
"epoch": 0.14,
"grad_norm": 0.9723901152610779,
"learning_rate": 9.666122701256697e-06,
"loss": 0.6729,
"step": 2265
},
{
"epoch": 0.14,
"grad_norm": 0.9405593276023865,
"learning_rate": 9.665753969963779e-06,
"loss": 0.6383,
"step": 2266
},
{
"epoch": 0.14,
"grad_norm": 0.9103307127952576,
"learning_rate": 9.66538504221219e-06,
"loss": 0.6709,
"step": 2267
},
{
"epoch": 0.14,
"grad_norm": 0.8941056132316589,
"learning_rate": 9.665015918017467e-06,
"loss": 0.6022,
"step": 2268
},
{
"epoch": 0.14,
"grad_norm": 0.9082260727882385,
"learning_rate": 9.664646597395151e-06,
"loss": 0.6307,
"step": 2269
},
{
"epoch": 0.14,
"grad_norm": 0.9310553073883057,
"learning_rate": 9.664277080360796e-06,
"loss": 0.6667,
"step": 2270
},
{
"epoch": 0.14,
"grad_norm": 0.893653154373169,
"learning_rate": 9.663907366929958e-06,
"loss": 0.6233,
"step": 2271
},
{
"epoch": 0.14,
"grad_norm": 0.9378598928451538,
"learning_rate": 9.663537457118206e-06,
"loss": 0.6922,
"step": 2272
},
{
"epoch": 0.14,
"grad_norm": 1.0078368186950684,
"learning_rate": 9.663167350941114e-06,
"loss": 0.7198,
"step": 2273
},
{
"epoch": 0.14,
"grad_norm": 0.9074714183807373,
"learning_rate": 9.662797048414267e-06,
"loss": 0.6979,
"step": 2274
},
{
"epoch": 0.14,
"grad_norm": 0.9271409511566162,
"learning_rate": 9.662426549553257e-06,
"loss": 0.6478,
"step": 2275
},
{
"epoch": 0.14,
"grad_norm": 0.8915387392044067,
"learning_rate": 9.662055854373684e-06,
"loss": 0.6721,
"step": 2276
},
{
"epoch": 0.14,
"grad_norm": 0.8576652407646179,
"learning_rate": 9.661684962891158e-06,
"loss": 0.6245,
"step": 2277
},
{
"epoch": 0.14,
"grad_norm": 0.8375203013420105,
"learning_rate": 9.661313875121294e-06,
"loss": 0.5757,
"step": 2278
},
{
"epoch": 0.14,
"grad_norm": 0.9374811053276062,
"learning_rate": 9.66094259107972e-06,
"loss": 0.673,
"step": 2279
},
{
"epoch": 0.14,
"grad_norm": 0.9571980834007263,
"learning_rate": 9.660571110782066e-06,
"loss": 0.6396,
"step": 2280
},
{
"epoch": 0.14,
"grad_norm": 0.9160385131835938,
"learning_rate": 9.660199434243977e-06,
"loss": 0.6462,
"step": 2281
},
{
"epoch": 0.14,
"grad_norm": 0.8740729689598083,
"learning_rate": 9.6598275614811e-06,
"loss": 0.6305,
"step": 2282
},
{
"epoch": 0.14,
"grad_norm": 0.9242905974388123,
"learning_rate": 9.659455492509096e-06,
"loss": 0.6148,
"step": 2283
},
{
"epoch": 0.14,
"grad_norm": 0.9448089599609375,
"learning_rate": 9.659083227343628e-06,
"loss": 0.6224,
"step": 2284
},
{
"epoch": 0.14,
"grad_norm": 0.9231502413749695,
"learning_rate": 9.658710766000375e-06,
"loss": 0.6241,
"step": 2285
},
{
"epoch": 0.14,
"grad_norm": 0.9591917991638184,
"learning_rate": 9.658338108495018e-06,
"loss": 0.6087,
"step": 2286
},
{
"epoch": 0.14,
"grad_norm": 0.9254891276359558,
"learning_rate": 9.65796525484325e-06,
"loss": 0.6346,
"step": 2287
},
{
"epoch": 0.14,
"grad_norm": 0.9530578255653381,
"learning_rate": 9.657592205060766e-06,
"loss": 0.6213,
"step": 2288
},
{
"epoch": 0.15,
"grad_norm": 0.9184418320655823,
"learning_rate": 9.657218959163278e-06,
"loss": 0.5876,
"step": 2289
},
{
"epoch": 0.15,
"grad_norm": 0.9244976043701172,
"learning_rate": 9.656845517166502e-06,
"loss": 0.6017,
"step": 2290
},
{
"epoch": 0.15,
"grad_norm": 0.9175297617912292,
"learning_rate": 9.656471879086158e-06,
"loss": 0.6249,
"step": 2291
},
{
"epoch": 0.15,
"grad_norm": 0.931868314743042,
"learning_rate": 9.656098044937985e-06,
"loss": 0.6413,
"step": 2292
},
{
"epoch": 0.15,
"grad_norm": 0.9301477074623108,
"learning_rate": 9.65572401473772e-06,
"loss": 0.6678,
"step": 2293
},
{
"epoch": 0.15,
"grad_norm": 0.8930208086967468,
"learning_rate": 9.655349788501112e-06,
"loss": 0.6502,
"step": 2294
},
{
"epoch": 0.15,
"grad_norm": 0.9450199007987976,
"learning_rate": 9.654975366243919e-06,
"loss": 0.622,
"step": 2295
},
{
"epoch": 0.15,
"grad_norm": 0.8430439829826355,
"learning_rate": 9.654600747981908e-06,
"loss": 0.5621,
"step": 2296
},
{
"epoch": 0.15,
"grad_norm": 0.9476586580276489,
"learning_rate": 9.654225933730852e-06,
"loss": 0.6427,
"step": 2297
},
{
"epoch": 0.15,
"grad_norm": 0.8823800086975098,
"learning_rate": 9.653850923506532e-06,
"loss": 0.6457,
"step": 2298
},
{
"epoch": 0.15,
"grad_norm": 0.8883811235427856,
"learning_rate": 9.653475717324739e-06,
"loss": 0.6332,
"step": 2299
},
{
"epoch": 0.15,
"grad_norm": 0.8883042335510254,
"learning_rate": 9.65310031520127e-06,
"loss": 0.6744,
"step": 2300
},
{
"epoch": 0.15,
"grad_norm": 0.9382773041725159,
"learning_rate": 9.652724717151938e-06,
"loss": 0.6717,
"step": 2301
},
{
"epoch": 0.15,
"grad_norm": 0.9416858553886414,
"learning_rate": 9.652348923192551e-06,
"loss": 0.648,
"step": 2302
},
{
"epoch": 0.15,
"grad_norm": 0.8762007355690002,
"learning_rate": 9.651972933338935e-06,
"loss": 0.5897,
"step": 2303
},
{
"epoch": 0.15,
"grad_norm": 0.9719755053520203,
"learning_rate": 9.651596747606924e-06,
"loss": 0.6991,
"step": 2304
},
{
"epoch": 0.15,
"grad_norm": 0.9252588152885437,
"learning_rate": 9.651220366012354e-06,
"loss": 0.6186,
"step": 2305
},
{
"epoch": 0.15,
"grad_norm": 0.9560814499855042,
"learning_rate": 9.650843788571076e-06,
"loss": 0.6411,
"step": 2306
},
{
"epoch": 0.15,
"grad_norm": 1.036543369293213,
"learning_rate": 9.650467015298943e-06,
"loss": 0.6339,
"step": 2307
},
{
"epoch": 0.15,
"grad_norm": 0.9324323534965515,
"learning_rate": 9.650090046211822e-06,
"loss": 0.6649,
"step": 2308
},
{
"epoch": 0.15,
"grad_norm": 0.8707371950149536,
"learning_rate": 9.649712881325587e-06,
"loss": 0.5718,
"step": 2309
},
{
"epoch": 0.15,
"grad_norm": 0.9522401690483093,
"learning_rate": 9.649335520656118e-06,
"loss": 0.6915,
"step": 2310
},
{
"epoch": 0.15,
"grad_norm": 0.9509444236755371,
"learning_rate": 9.648957964219303e-06,
"loss": 0.6725,
"step": 2311
},
{
"epoch": 0.15,
"grad_norm": 0.9052115678787231,
"learning_rate": 9.64858021203104e-06,
"loss": 0.6543,
"step": 2312
},
{
"epoch": 0.15,
"grad_norm": 0.914665162563324,
"learning_rate": 9.648202264107239e-06,
"loss": 0.6265,
"step": 2313
},
{
"epoch": 0.15,
"grad_norm": 0.895332396030426,
"learning_rate": 9.647824120463806e-06,
"loss": 0.6248,
"step": 2314
},
{
"epoch": 0.15,
"grad_norm": 0.9358121752738953,
"learning_rate": 9.64744578111667e-06,
"loss": 0.5782,
"step": 2315
},
{
"epoch": 0.15,
"grad_norm": 0.9630364179611206,
"learning_rate": 9.647067246081761e-06,
"loss": 0.6326,
"step": 2316
},
{
"epoch": 0.15,
"grad_norm": 0.9551122784614563,
"learning_rate": 9.646688515375014e-06,
"loss": 0.6224,
"step": 2317
},
{
"epoch": 0.15,
"grad_norm": 0.9448221325874329,
"learning_rate": 9.646309589012379e-06,
"loss": 0.6124,
"step": 2318
},
{
"epoch": 0.15,
"grad_norm": 0.8628481030464172,
"learning_rate": 9.64593046700981e-06,
"loss": 0.5868,
"step": 2319
},
{
"epoch": 0.15,
"grad_norm": 0.9186686873435974,
"learning_rate": 9.645551149383272e-06,
"loss": 0.6142,
"step": 2320
},
{
"epoch": 0.15,
"grad_norm": 0.8454536199569702,
"learning_rate": 9.645171636148736e-06,
"loss": 0.5603,
"step": 2321
},
{
"epoch": 0.15,
"grad_norm": 0.904983639717102,
"learning_rate": 9.644791927322182e-06,
"loss": 0.6052,
"step": 2322
},
{
"epoch": 0.15,
"grad_norm": 0.9742248058319092,
"learning_rate": 9.644412022919597e-06,
"loss": 0.5941,
"step": 2323
},
{
"epoch": 0.15,
"grad_norm": 0.8749731183052063,
"learning_rate": 9.644031922956979e-06,
"loss": 0.625,
"step": 2324
},
{
"epoch": 0.15,
"grad_norm": 0.9458450078964233,
"learning_rate": 9.64365162745033e-06,
"loss": 0.6475,
"step": 2325
},
{
"epoch": 0.15,
"grad_norm": 0.8835443258285522,
"learning_rate": 9.643271136415668e-06,
"loss": 0.6253,
"step": 2326
},
{
"epoch": 0.15,
"grad_norm": 0.9501144886016846,
"learning_rate": 9.642890449869008e-06,
"loss": 0.6576,
"step": 2327
},
{
"epoch": 0.15,
"grad_norm": 0.8323443531990051,
"learning_rate": 9.642509567826386e-06,
"loss": 0.6001,
"step": 2328
},
{
"epoch": 0.15,
"grad_norm": 0.8868235945701599,
"learning_rate": 9.642128490303834e-06,
"loss": 0.5987,
"step": 2329
},
{
"epoch": 0.15,
"grad_norm": 0.8260801434516907,
"learning_rate": 9.6417472173174e-06,
"loss": 0.6312,
"step": 2330
},
{
"epoch": 0.15,
"grad_norm": 0.8221123218536377,
"learning_rate": 9.64136574888314e-06,
"loss": 0.6371,
"step": 2331
},
{
"epoch": 0.15,
"grad_norm": 0.911744236946106,
"learning_rate": 9.640984085017113e-06,
"loss": 0.6679,
"step": 2332
},
{
"epoch": 0.15,
"grad_norm": 0.8895740509033203,
"learning_rate": 9.640602225735391e-06,
"loss": 0.6627,
"step": 2333
},
{
"epoch": 0.15,
"grad_norm": 0.8667907118797302,
"learning_rate": 9.640220171054054e-06,
"loss": 0.6181,
"step": 2334
},
{
"epoch": 0.15,
"grad_norm": 0.9176861643791199,
"learning_rate": 9.639837920989188e-06,
"loss": 0.6174,
"step": 2335
},
{
"epoch": 0.15,
"grad_norm": 1.0207765102386475,
"learning_rate": 9.639455475556887e-06,
"loss": 0.6571,
"step": 2336
},
{
"epoch": 0.15,
"grad_norm": 0.9681141972541809,
"learning_rate": 9.639072834773254e-06,
"loss": 0.6719,
"step": 2337
},
{
"epoch": 0.15,
"grad_norm": 0.8755819797515869,
"learning_rate": 9.638689998654404e-06,
"loss": 0.5911,
"step": 2338
},
{
"epoch": 0.15,
"grad_norm": 0.9221803545951843,
"learning_rate": 9.638306967216453e-06,
"loss": 0.6486,
"step": 2339
},
{
"epoch": 0.15,
"grad_norm": 0.8622904419898987,
"learning_rate": 9.637923740475534e-06,
"loss": 0.5772,
"step": 2340
},
{
"epoch": 0.15,
"grad_norm": 0.888806164264679,
"learning_rate": 9.637540318447778e-06,
"loss": 0.6504,
"step": 2341
},
{
"epoch": 0.15,
"grad_norm": 0.8896088004112244,
"learning_rate": 9.637156701149333e-06,
"loss": 0.6623,
"step": 2342
},
{
"epoch": 0.15,
"grad_norm": 0.9848870635032654,
"learning_rate": 9.636772888596352e-06,
"loss": 0.6652,
"step": 2343
},
{
"epoch": 0.15,
"grad_norm": 0.814385712146759,
"learning_rate": 9.636388880804991e-06,
"loss": 0.6293,
"step": 2344
},
{
"epoch": 0.15,
"grad_norm": 0.9577558040618896,
"learning_rate": 9.636004677791427e-06,
"loss": 0.6725,
"step": 2345
},
{
"epoch": 0.15,
"grad_norm": 0.9663403630256653,
"learning_rate": 9.635620279571833e-06,
"loss": 0.6702,
"step": 2346
},
{
"epoch": 0.15,
"grad_norm": 0.9322980642318726,
"learning_rate": 9.635235686162395e-06,
"loss": 0.6654,
"step": 2347
},
{
"epoch": 0.15,
"grad_norm": 0.8965892791748047,
"learning_rate": 9.634850897579304e-06,
"loss": 0.6208,
"step": 2348
},
{
"epoch": 0.15,
"grad_norm": 0.858284592628479,
"learning_rate": 9.63446591383877e-06,
"loss": 0.6063,
"step": 2349
},
{
"epoch": 0.15,
"grad_norm": 0.932563304901123,
"learning_rate": 9.634080734956993e-06,
"loss": 0.6188,
"step": 2350
},
{
"epoch": 0.15,
"grad_norm": 0.9433985948562622,
"learning_rate": 9.633695360950202e-06,
"loss": 0.6515,
"step": 2351
},
{
"epoch": 0.15,
"grad_norm": 0.9088814854621887,
"learning_rate": 9.633309791834617e-06,
"loss": 0.5985,
"step": 2352
},
{
"epoch": 0.15,
"grad_norm": 0.9924407601356506,
"learning_rate": 9.632924027626474e-06,
"loss": 0.6527,
"step": 2353
},
{
"epoch": 0.15,
"grad_norm": 0.989184558391571,
"learning_rate": 9.632538068342018e-06,
"loss": 0.6183,
"step": 2354
},
{
"epoch": 0.15,
"grad_norm": 0.898025631904602,
"learning_rate": 9.632151913997498e-06,
"loss": 0.6068,
"step": 2355
},
{
"epoch": 0.15,
"grad_norm": 0.8926374912261963,
"learning_rate": 9.631765564609177e-06,
"loss": 0.588,
"step": 2356
},
{
"epoch": 0.15,
"grad_norm": 0.9426562190055847,
"learning_rate": 9.63137902019332e-06,
"loss": 0.6104,
"step": 2357
},
{
"epoch": 0.15,
"grad_norm": 0.9089484810829163,
"learning_rate": 9.630992280766202e-06,
"loss": 0.5981,
"step": 2358
},
{
"epoch": 0.15,
"grad_norm": 0.9309037923812866,
"learning_rate": 9.630605346344113e-06,
"loss": 0.6064,
"step": 2359
},
{
"epoch": 0.15,
"grad_norm": 0.9744449257850647,
"learning_rate": 9.630218216943338e-06,
"loss": 0.6856,
"step": 2360
},
{
"epoch": 0.15,
"grad_norm": 0.9766737222671509,
"learning_rate": 9.629830892580183e-06,
"loss": 0.669,
"step": 2361
},
{
"epoch": 0.15,
"grad_norm": 0.9922558665275574,
"learning_rate": 9.629443373270954e-06,
"loss": 0.631,
"step": 2362
},
{
"epoch": 0.15,
"grad_norm": 0.960340678691864,
"learning_rate": 9.62905565903197e-06,
"loss": 0.653,
"step": 2363
},
{
"epoch": 0.15,
"grad_norm": 0.9167748689651489,
"learning_rate": 9.628667749879555e-06,
"loss": 0.6177,
"step": 2364
},
{
"epoch": 0.15,
"grad_norm": 0.8925089836120605,
"learning_rate": 9.628279645830044e-06,
"loss": 0.6215,
"step": 2365
},
{
"epoch": 0.15,
"grad_norm": 0.9379563331604004,
"learning_rate": 9.627891346899775e-06,
"loss": 0.5828,
"step": 2366
},
{
"epoch": 0.15,
"grad_norm": 0.8987218141555786,
"learning_rate": 9.627502853105104e-06,
"loss": 0.6567,
"step": 2367
},
{
"epoch": 0.15,
"grad_norm": 0.8803840279579163,
"learning_rate": 9.627114164462385e-06,
"loss": 0.6219,
"step": 2368
},
{
"epoch": 0.15,
"grad_norm": 0.9460154175758362,
"learning_rate": 9.626725280987985e-06,
"loss": 0.6922,
"step": 2369
},
{
"epoch": 0.15,
"grad_norm": 0.8633837103843689,
"learning_rate": 9.626336202698277e-06,
"loss": 0.6041,
"step": 2370
},
{
"epoch": 0.15,
"grad_norm": 0.9062354564666748,
"learning_rate": 9.625946929609647e-06,
"loss": 0.6013,
"step": 2371
},
{
"epoch": 0.15,
"grad_norm": 1.0080102682113647,
"learning_rate": 9.625557461738484e-06,
"loss": 0.6919,
"step": 2372
},
{
"epoch": 0.15,
"grad_norm": 0.9922934174537659,
"learning_rate": 9.625167799101188e-06,
"loss": 0.6966,
"step": 2373
},
{
"epoch": 0.15,
"grad_norm": 0.9306240081787109,
"learning_rate": 9.624777941714165e-06,
"loss": 0.7226,
"step": 2374
},
{
"epoch": 0.15,
"grad_norm": 0.9547491073608398,
"learning_rate": 9.624387889593832e-06,
"loss": 0.6127,
"step": 2375
},
{
"epoch": 0.15,
"grad_norm": 0.9361152052879333,
"learning_rate": 9.62399764275661e-06,
"loss": 0.6275,
"step": 2376
},
{
"epoch": 0.15,
"grad_norm": 0.9301709532737732,
"learning_rate": 9.623607201218934e-06,
"loss": 0.6553,
"step": 2377
},
{
"epoch": 0.15,
"grad_norm": 0.9561883807182312,
"learning_rate": 9.623216564997244e-06,
"loss": 0.6708,
"step": 2378
},
{
"epoch": 0.15,
"grad_norm": 0.8827099800109863,
"learning_rate": 9.622825734107987e-06,
"loss": 0.6176,
"step": 2379
},
{
"epoch": 0.15,
"grad_norm": 0.9545076489448547,
"learning_rate": 9.62243470856762e-06,
"loss": 0.6568,
"step": 2380
},
{
"epoch": 0.15,
"grad_norm": 0.947793185710907,
"learning_rate": 9.622043488392607e-06,
"loss": 0.6247,
"step": 2381
},
{
"epoch": 0.15,
"grad_norm": 0.8860893249511719,
"learning_rate": 9.621652073599423e-06,
"loss": 0.6495,
"step": 2382
},
{
"epoch": 0.15,
"grad_norm": 0.852778434753418,
"learning_rate": 9.621260464204548e-06,
"loss": 0.6111,
"step": 2383
},
{
"epoch": 0.15,
"grad_norm": 0.8790839910507202,
"learning_rate": 9.620868660224468e-06,
"loss": 0.6269,
"step": 2384
},
{
"epoch": 0.15,
"grad_norm": 0.9253284931182861,
"learning_rate": 9.620476661675685e-06,
"loss": 0.6211,
"step": 2385
},
{
"epoch": 0.15,
"grad_norm": 0.892335832118988,
"learning_rate": 9.620084468574704e-06,
"loss": 0.6312,
"step": 2386
},
{
"epoch": 0.15,
"grad_norm": 0.9835995435714722,
"learning_rate": 9.619692080938039e-06,
"loss": 0.5984,
"step": 2387
},
{
"epoch": 0.15,
"grad_norm": 0.9870280027389526,
"learning_rate": 9.61929949878221e-06,
"loss": 0.6646,
"step": 2388
},
{
"epoch": 0.15,
"grad_norm": 1.0109413862228394,
"learning_rate": 9.618906722123748e-06,
"loss": 0.6489,
"step": 2389
},
{
"epoch": 0.15,
"grad_norm": 0.9506871700286865,
"learning_rate": 9.618513750979193e-06,
"loss": 0.649,
"step": 2390
},
{
"epoch": 0.15,
"grad_norm": 0.8704227209091187,
"learning_rate": 9.61812058536509e-06,
"loss": 0.5762,
"step": 2391
},
{
"epoch": 0.15,
"grad_norm": 0.9024654626846313,
"learning_rate": 9.617727225297994e-06,
"loss": 0.6464,
"step": 2392
},
{
"epoch": 0.15,
"grad_norm": 0.9265242218971252,
"learning_rate": 9.617333670794468e-06,
"loss": 0.627,
"step": 2393
},
{
"epoch": 0.15,
"grad_norm": 0.8859432935714722,
"learning_rate": 9.616939921871087e-06,
"loss": 0.6211,
"step": 2394
},
{
"epoch": 0.15,
"grad_norm": 0.9842885732650757,
"learning_rate": 9.616545978544424e-06,
"loss": 0.6308,
"step": 2395
},
{
"epoch": 0.15,
"grad_norm": 0.8890007138252258,
"learning_rate": 9.616151840831069e-06,
"loss": 0.5769,
"step": 2396
},
{
"epoch": 0.15,
"grad_norm": 0.9050889015197754,
"learning_rate": 9.61575750874762e-06,
"loss": 0.6224,
"step": 2397
},
{
"epoch": 0.15,
"grad_norm": 0.8961501717567444,
"learning_rate": 9.615362982310679e-06,
"loss": 0.5271,
"step": 2398
},
{
"epoch": 0.15,
"grad_norm": 0.8966047167778015,
"learning_rate": 9.614968261536858e-06,
"loss": 0.6134,
"step": 2399
},
{
"epoch": 0.15,
"grad_norm": 1.0056560039520264,
"learning_rate": 9.61457334644278e-06,
"loss": 0.6931,
"step": 2400
},
{
"epoch": 0.15,
"grad_norm": 0.9624162316322327,
"learning_rate": 9.61417823704507e-06,
"loss": 0.6242,
"step": 2401
},
{
"epoch": 0.15,
"grad_norm": 0.9640290141105652,
"learning_rate": 9.613782933360365e-06,
"loss": 0.6799,
"step": 2402
},
{
"epoch": 0.15,
"grad_norm": 0.9172433018684387,
"learning_rate": 9.613387435405312e-06,
"loss": 0.5416,
"step": 2403
},
{
"epoch": 0.15,
"grad_norm": 0.965398371219635,
"learning_rate": 9.612991743196562e-06,
"loss": 0.6174,
"step": 2404
},
{
"epoch": 0.15,
"grad_norm": 0.909716010093689,
"learning_rate": 9.612595856750776e-06,
"loss": 0.6275,
"step": 2405
},
{
"epoch": 0.15,
"grad_norm": 0.9636967182159424,
"learning_rate": 9.612199776084627e-06,
"loss": 0.6389,
"step": 2406
},
{
"epoch": 0.15,
"grad_norm": 0.8924964070320129,
"learning_rate": 9.611803501214789e-06,
"loss": 0.6796,
"step": 2407
},
{
"epoch": 0.15,
"grad_norm": 0.9327677488327026,
"learning_rate": 9.61140703215795e-06,
"loss": 0.612,
"step": 2408
},
{
"epoch": 0.15,
"grad_norm": 0.943336546421051,
"learning_rate": 9.611010368930801e-06,
"loss": 0.6227,
"step": 2409
},
{
"epoch": 0.15,
"grad_norm": 0.9563452005386353,
"learning_rate": 9.610613511550047e-06,
"loss": 0.6554,
"step": 2410
},
{
"epoch": 0.15,
"grad_norm": 0.9521295428276062,
"learning_rate": 9.610216460032398e-06,
"loss": 0.6661,
"step": 2411
},
{
"epoch": 0.15,
"grad_norm": 1.0174225568771362,
"learning_rate": 9.60981921439457e-06,
"loss": 0.716,
"step": 2412
},
{
"epoch": 0.15,
"grad_norm": 1.0105873346328735,
"learning_rate": 9.609421774653291e-06,
"loss": 0.6864,
"step": 2413
},
{
"epoch": 0.15,
"grad_norm": 0.9458989500999451,
"learning_rate": 9.609024140825299e-06,
"loss": 0.5787,
"step": 2414
},
{
"epoch": 0.15,
"grad_norm": 1.0147578716278076,
"learning_rate": 9.608626312927331e-06,
"loss": 0.6836,
"step": 2415
},
{
"epoch": 0.15,
"grad_norm": 0.9052198529243469,
"learning_rate": 9.608228290976143e-06,
"loss": 0.6575,
"step": 2416
},
{
"epoch": 0.15,
"grad_norm": 0.9532240629196167,
"learning_rate": 9.607830074988491e-06,
"loss": 0.7125,
"step": 2417
},
{
"epoch": 0.15,
"grad_norm": 0.9613702297210693,
"learning_rate": 9.607431664981144e-06,
"loss": 0.6158,
"step": 2418
},
{
"epoch": 0.15,
"grad_norm": 1.0011951923370361,
"learning_rate": 9.607033060970878e-06,
"loss": 0.6624,
"step": 2419
},
{
"epoch": 0.15,
"grad_norm": 1.0187532901763916,
"learning_rate": 9.606634262974477e-06,
"loss": 0.6558,
"step": 2420
},
{
"epoch": 0.15,
"grad_norm": 0.9332427382469177,
"learning_rate": 9.606235271008732e-06,
"loss": 0.5966,
"step": 2421
},
{
"epoch": 0.15,
"grad_norm": 1.0140283107757568,
"learning_rate": 9.605836085090445e-06,
"loss": 0.6317,
"step": 2422
},
{
"epoch": 0.15,
"grad_norm": 0.9521609544754028,
"learning_rate": 9.605436705236421e-06,
"loss": 0.624,
"step": 2423
},
{
"epoch": 0.15,
"grad_norm": 0.8743317127227783,
"learning_rate": 9.60503713146348e-06,
"loss": 0.6424,
"step": 2424
},
{
"epoch": 0.15,
"grad_norm": 0.9343128800392151,
"learning_rate": 9.604637363788444e-06,
"loss": 0.6336,
"step": 2425
},
{
"epoch": 0.15,
"grad_norm": 0.876990795135498,
"learning_rate": 9.604237402228149e-06,
"loss": 0.6946,
"step": 2426
},
{
"epoch": 0.15,
"grad_norm": 1.0633113384246826,
"learning_rate": 9.603837246799431e-06,
"loss": 0.6597,
"step": 2427
},
{
"epoch": 0.15,
"grad_norm": 0.9568866491317749,
"learning_rate": 9.603436897519145e-06,
"loss": 0.6364,
"step": 2428
},
{
"epoch": 0.15,
"grad_norm": 0.8877198100090027,
"learning_rate": 9.603036354404145e-06,
"loss": 0.6024,
"step": 2429
},
{
"epoch": 0.15,
"grad_norm": 0.844281792640686,
"learning_rate": 9.602635617471295e-06,
"loss": 0.6393,
"step": 2430
},
{
"epoch": 0.15,
"grad_norm": 0.8881232738494873,
"learning_rate": 9.602234686737473e-06,
"loss": 0.6738,
"step": 2431
},
{
"epoch": 0.15,
"grad_norm": 0.8689331412315369,
"learning_rate": 9.601833562219556e-06,
"loss": 0.6245,
"step": 2432
},
{
"epoch": 0.15,
"grad_norm": 0.8485287427902222,
"learning_rate": 9.601432243934437e-06,
"loss": 0.5738,
"step": 2433
},
{
"epoch": 0.15,
"grad_norm": 0.910656213760376,
"learning_rate": 9.601030731899014e-06,
"loss": 0.6129,
"step": 2434
},
{
"epoch": 0.15,
"grad_norm": 0.8227107524871826,
"learning_rate": 9.600629026130192e-06,
"loss": 0.5835,
"step": 2435
},
{
"epoch": 0.15,
"grad_norm": 0.948371410369873,
"learning_rate": 9.600227126644887e-06,
"loss": 0.7007,
"step": 2436
},
{
"epoch": 0.15,
"grad_norm": 0.8964093327522278,
"learning_rate": 9.59982503346002e-06,
"loss": 0.6071,
"step": 2437
},
{
"epoch": 0.15,
"grad_norm": 0.9090175628662109,
"learning_rate": 9.599422746592522e-06,
"loss": 0.6698,
"step": 2438
},
{
"epoch": 0.15,
"grad_norm": 1.0446149110794067,
"learning_rate": 9.599020266059334e-06,
"loss": 0.699,
"step": 2439
},
{
"epoch": 0.15,
"grad_norm": 0.8509514331817627,
"learning_rate": 9.5986175918774e-06,
"loss": 0.6503,
"step": 2440
},
{
"epoch": 0.15,
"grad_norm": 0.9461331367492676,
"learning_rate": 9.598214724063678e-06,
"loss": 0.6716,
"step": 2441
},
{
"epoch": 0.15,
"grad_norm": 0.8966230750083923,
"learning_rate": 9.597811662635128e-06,
"loss": 0.6537,
"step": 2442
},
{
"epoch": 0.15,
"grad_norm": 1.0068098306655884,
"learning_rate": 9.597408407608725e-06,
"loss": 0.6665,
"step": 2443
},
{
"epoch": 0.15,
"grad_norm": 0.9178805351257324,
"learning_rate": 9.597004959001447e-06,
"loss": 0.628,
"step": 2444
},
{
"epoch": 0.15,
"grad_norm": 0.9293497204780579,
"learning_rate": 9.596601316830282e-06,
"loss": 0.6272,
"step": 2445
},
{
"epoch": 0.15,
"grad_norm": 0.9563755989074707,
"learning_rate": 9.596197481112225e-06,
"loss": 0.6115,
"step": 2446
},
{
"epoch": 0.16,
"grad_norm": 0.8711754083633423,
"learning_rate": 9.59579345186428e-06,
"loss": 0.5987,
"step": 2447
},
{
"epoch": 0.16,
"grad_norm": 0.9303868412971497,
"learning_rate": 9.595389229103464e-06,
"loss": 0.6427,
"step": 2448
},
{
"epoch": 0.16,
"grad_norm": 0.8827221393585205,
"learning_rate": 9.594984812846792e-06,
"loss": 0.6017,
"step": 2449
},
{
"epoch": 0.16,
"grad_norm": 0.9278771877288818,
"learning_rate": 9.594580203111294e-06,
"loss": 0.5994,
"step": 2450
},
{
"epoch": 0.16,
"grad_norm": 0.9450991153717041,
"learning_rate": 9.594175399914008e-06,
"loss": 0.6128,
"step": 2451
},
{
"epoch": 0.16,
"grad_norm": 0.9174882173538208,
"learning_rate": 9.593770403271977e-06,
"loss": 0.661,
"step": 2452
},
{
"epoch": 0.16,
"grad_norm": 0.9412451982498169,
"learning_rate": 9.593365213202255e-06,
"loss": 0.6346,
"step": 2453
},
{
"epoch": 0.16,
"grad_norm": 0.8439229726791382,
"learning_rate": 9.592959829721903e-06,
"loss": 0.6032,
"step": 2454
},
{
"epoch": 0.16,
"grad_norm": 0.8956865072250366,
"learning_rate": 9.59255425284799e-06,
"loss": 0.6588,
"step": 2455
},
{
"epoch": 0.16,
"grad_norm": 0.8552918434143066,
"learning_rate": 9.592148482597595e-06,
"loss": 0.6176,
"step": 2456
},
{
"epoch": 0.16,
"grad_norm": 0.9776921272277832,
"learning_rate": 9.591742518987802e-06,
"loss": 0.6922,
"step": 2457
},
{
"epoch": 0.16,
"grad_norm": 0.8479081392288208,
"learning_rate": 9.591336362035703e-06,
"loss": 0.5635,
"step": 2458
},
{
"epoch": 0.16,
"grad_norm": 0.8601279854774475,
"learning_rate": 9.590930011758403e-06,
"loss": 0.6025,
"step": 2459
},
{
"epoch": 0.16,
"grad_norm": 0.9203231334686279,
"learning_rate": 9.590523468173011e-06,
"loss": 0.6317,
"step": 2460
},
{
"epoch": 0.16,
"grad_norm": 0.9199931621551514,
"learning_rate": 9.590116731296646e-06,
"loss": 0.578,
"step": 2461
},
{
"epoch": 0.16,
"grad_norm": 0.8737656474113464,
"learning_rate": 9.589709801146432e-06,
"loss": 0.6047,
"step": 2462
},
{
"epoch": 0.16,
"grad_norm": 0.8943954706192017,
"learning_rate": 9.589302677739506e-06,
"loss": 0.6203,
"step": 2463
},
{
"epoch": 0.16,
"grad_norm": 0.9137763381004333,
"learning_rate": 9.588895361093009e-06,
"loss": 0.6568,
"step": 2464
},
{
"epoch": 0.16,
"grad_norm": 0.9582598805427551,
"learning_rate": 9.588487851224091e-06,
"loss": 0.6377,
"step": 2465
},
{
"epoch": 0.16,
"grad_norm": 0.9316682815551758,
"learning_rate": 9.588080148149912e-06,
"loss": 0.6544,
"step": 2466
},
{
"epoch": 0.16,
"grad_norm": 1.0368373394012451,
"learning_rate": 9.587672251887639e-06,
"loss": 0.7225,
"step": 2467
},
{
"epoch": 0.16,
"grad_norm": 0.8449527621269226,
"learning_rate": 9.587264162454447e-06,
"loss": 0.5722,
"step": 2468
},
{
"epoch": 0.16,
"grad_norm": 0.8870164155960083,
"learning_rate": 9.586855879867519e-06,
"loss": 0.6279,
"step": 2469
},
{
"epoch": 0.16,
"grad_norm": 0.9462539553642273,
"learning_rate": 9.586447404144046e-06,
"loss": 0.6945,
"step": 2470
},
{
"epoch": 0.16,
"grad_norm": 0.9636325240135193,
"learning_rate": 9.58603873530123e-06,
"loss": 0.626,
"step": 2471
},
{
"epoch": 0.16,
"grad_norm": 0.8742256164550781,
"learning_rate": 9.585629873356273e-06,
"loss": 0.5091,
"step": 2472
},
{
"epoch": 0.16,
"grad_norm": 0.937807559967041,
"learning_rate": 9.585220818326395e-06,
"loss": 0.6507,
"step": 2473
},
{
"epoch": 0.16,
"grad_norm": 0.8809791207313538,
"learning_rate": 9.58481157022882e-06,
"loss": 0.6041,
"step": 2474
},
{
"epoch": 0.16,
"grad_norm": 0.9614810347557068,
"learning_rate": 9.584402129080779e-06,
"loss": 0.6466,
"step": 2475
},
{
"epoch": 0.16,
"grad_norm": 0.8808587789535522,
"learning_rate": 9.583992494899513e-06,
"loss": 0.6032,
"step": 2476
},
{
"epoch": 0.16,
"grad_norm": 0.9078788161277771,
"learning_rate": 9.583582667702269e-06,
"loss": 0.6371,
"step": 2477
},
{
"epoch": 0.16,
"grad_norm": 0.8558230996131897,
"learning_rate": 9.583172647506305e-06,
"loss": 0.6056,
"step": 2478
},
{
"epoch": 0.16,
"grad_norm": 0.8734446167945862,
"learning_rate": 9.582762434328883e-06,
"loss": 0.6081,
"step": 2479
},
{
"epoch": 0.16,
"grad_norm": 0.8628250360488892,
"learning_rate": 9.582352028187278e-06,
"loss": 0.665,
"step": 2480
},
{
"epoch": 0.16,
"grad_norm": 0.8482995629310608,
"learning_rate": 9.581941429098769e-06,
"loss": 0.5588,
"step": 2481
},
{
"epoch": 0.16,
"grad_norm": 0.9192953109741211,
"learning_rate": 9.581530637080647e-06,
"loss": 0.6463,
"step": 2482
},
{
"epoch": 0.16,
"grad_norm": 0.9629647135734558,
"learning_rate": 9.581119652150208e-06,
"loss": 0.6296,
"step": 2483
},
{
"epoch": 0.16,
"grad_norm": 0.9503898620605469,
"learning_rate": 9.580708474324755e-06,
"loss": 0.6034,
"step": 2484
},
{
"epoch": 0.16,
"grad_norm": 0.8851401209831238,
"learning_rate": 9.580297103621605e-06,
"loss": 0.6284,
"step": 2485
},
{
"epoch": 0.16,
"grad_norm": 0.9362215399742126,
"learning_rate": 9.579885540058079e-06,
"loss": 0.6451,
"step": 2486
},
{
"epoch": 0.16,
"grad_norm": 0.8985670804977417,
"learning_rate": 9.579473783651503e-06,
"loss": 0.6017,
"step": 2487
},
{
"epoch": 0.16,
"grad_norm": 0.977086067199707,
"learning_rate": 9.579061834419217e-06,
"loss": 0.6823,
"step": 2488
},
{
"epoch": 0.16,
"grad_norm": 0.9364843368530273,
"learning_rate": 9.578649692378567e-06,
"loss": 0.706,
"step": 2489
},
{
"epoch": 0.16,
"grad_norm": 0.9483008980751038,
"learning_rate": 9.578237357546907e-06,
"loss": 0.6172,
"step": 2490
},
{
"epoch": 0.16,
"grad_norm": 0.9181289672851562,
"learning_rate": 9.577824829941597e-06,
"loss": 0.6565,
"step": 2491
},
{
"epoch": 0.16,
"grad_norm": 0.9168728590011597,
"learning_rate": 9.577412109580009e-06,
"loss": 0.6018,
"step": 2492
},
{
"epoch": 0.16,
"grad_norm": 0.9681271910667419,
"learning_rate": 9.57699919647952e-06,
"loss": 0.6707,
"step": 2493
},
{
"epoch": 0.16,
"grad_norm": 1.0229047536849976,
"learning_rate": 9.576586090657519e-06,
"loss": 0.6503,
"step": 2494
},
{
"epoch": 0.16,
"grad_norm": 0.9658745527267456,
"learning_rate": 9.576172792131397e-06,
"loss": 0.6538,
"step": 2495
},
{
"epoch": 0.16,
"grad_norm": 0.9022778272628784,
"learning_rate": 9.57575930091856e-06,
"loss": 0.6287,
"step": 2496
},
{
"epoch": 0.16,
"grad_norm": 0.9274746179580688,
"learning_rate": 9.575345617036413e-06,
"loss": 0.659,
"step": 2497
},
{
"epoch": 0.16,
"grad_norm": 0.8899304270744324,
"learning_rate": 9.574931740502383e-06,
"loss": 0.6294,
"step": 2498
},
{
"epoch": 0.16,
"grad_norm": 1.072940468788147,
"learning_rate": 9.57451767133389e-06,
"loss": 0.6603,
"step": 2499
},
{
"epoch": 0.16,
"grad_norm": 0.8845842480659485,
"learning_rate": 9.57410340954837e-06,
"loss": 0.6408,
"step": 2500
},
{
"epoch": 0.16,
"grad_norm": 0.8758795857429504,
"learning_rate": 9.57368895516327e-06,
"loss": 0.6419,
"step": 2501
},
{
"epoch": 0.16,
"grad_norm": 0.9652571082115173,
"learning_rate": 9.573274308196037e-06,
"loss": 0.6189,
"step": 2502
},
{
"epoch": 0.16,
"grad_norm": 0.8658424615859985,
"learning_rate": 9.572859468664133e-06,
"loss": 0.5963,
"step": 2503
},
{
"epoch": 0.16,
"grad_norm": 0.9083049893379211,
"learning_rate": 9.572444436585025e-06,
"loss": 0.6744,
"step": 2504
},
{
"epoch": 0.16,
"grad_norm": 0.8568194508552551,
"learning_rate": 9.572029211976189e-06,
"loss": 0.6413,
"step": 2505
},
{
"epoch": 0.16,
"grad_norm": 0.8805359601974487,
"learning_rate": 9.571613794855105e-06,
"loss": 0.6408,
"step": 2506
},
{
"epoch": 0.16,
"grad_norm": 0.9113273620605469,
"learning_rate": 9.57119818523927e-06,
"loss": 0.6041,
"step": 2507
},
{
"epoch": 0.16,
"grad_norm": 0.925477147102356,
"learning_rate": 9.570782383146183e-06,
"loss": 0.6571,
"step": 2508
},
{
"epoch": 0.16,
"grad_norm": 1.012748122215271,
"learning_rate": 9.570366388593347e-06,
"loss": 0.6822,
"step": 2509
},
{
"epoch": 0.16,
"grad_norm": 1.0008292198181152,
"learning_rate": 9.569950201598283e-06,
"loss": 0.6183,
"step": 2510
},
{
"epoch": 0.16,
"grad_norm": 0.8939400911331177,
"learning_rate": 9.569533822178513e-06,
"loss": 0.6556,
"step": 2511
},
{
"epoch": 0.16,
"grad_norm": 0.8361603021621704,
"learning_rate": 9.569117250351571e-06,
"loss": 0.6179,
"step": 2512
},
{
"epoch": 0.16,
"grad_norm": 0.9382283687591553,
"learning_rate": 9.568700486134996e-06,
"loss": 0.6307,
"step": 2513
},
{
"epoch": 0.16,
"grad_norm": 0.9003825783729553,
"learning_rate": 9.568283529546336e-06,
"loss": 0.5918,
"step": 2514
},
{
"epoch": 0.16,
"grad_norm": 0.9097765684127808,
"learning_rate": 9.56786638060315e-06,
"loss": 0.6467,
"step": 2515
},
{
"epoch": 0.16,
"grad_norm": 0.938727080821991,
"learning_rate": 9.567449039323e-06,
"loss": 0.6822,
"step": 2516
},
{
"epoch": 0.16,
"grad_norm": 0.8862230181694031,
"learning_rate": 9.56703150572346e-06,
"loss": 0.6319,
"step": 2517
},
{
"epoch": 0.16,
"grad_norm": 0.8898985981941223,
"learning_rate": 9.56661377982211e-06,
"loss": 0.6129,
"step": 2518
},
{
"epoch": 0.16,
"grad_norm": 0.9016578197479248,
"learning_rate": 9.566195861636542e-06,
"loss": 0.668,
"step": 2519
},
{
"epoch": 0.16,
"grad_norm": 0.8894520401954651,
"learning_rate": 9.56577775118435e-06,
"loss": 0.6323,
"step": 2520
},
{
"epoch": 0.16,
"grad_norm": 0.9632962346076965,
"learning_rate": 9.56535944848314e-06,
"loss": 0.7104,
"step": 2521
},
{
"epoch": 0.16,
"grad_norm": 0.8559346199035645,
"learning_rate": 9.564940953550525e-06,
"loss": 0.6451,
"step": 2522
},
{
"epoch": 0.16,
"grad_norm": 0.9069300293922424,
"learning_rate": 9.564522266404127e-06,
"loss": 0.6152,
"step": 2523
},
{
"epoch": 0.16,
"grad_norm": 0.9622822403907776,
"learning_rate": 9.564103387061575e-06,
"loss": 0.5734,
"step": 2524
},
{
"epoch": 0.16,
"grad_norm": 0.9601327776908875,
"learning_rate": 9.563684315540507e-06,
"loss": 0.6096,
"step": 2525
},
{
"epoch": 0.16,
"grad_norm": 0.905097246170044,
"learning_rate": 9.563265051858569e-06,
"loss": 0.6449,
"step": 2526
},
{
"epoch": 0.16,
"grad_norm": 0.9115608334541321,
"learning_rate": 9.562845596033413e-06,
"loss": 0.6879,
"step": 2527
},
{
"epoch": 0.16,
"grad_norm": 0.9223030209541321,
"learning_rate": 9.562425948082702e-06,
"loss": 0.6029,
"step": 2528
},
{
"epoch": 0.16,
"grad_norm": 0.8907862901687622,
"learning_rate": 9.562006108024106e-06,
"loss": 0.6018,
"step": 2529
},
{
"epoch": 0.16,
"grad_norm": 0.9722427129745483,
"learning_rate": 9.561586075875304e-06,
"loss": 0.649,
"step": 2530
},
{
"epoch": 0.16,
"grad_norm": 0.9734516739845276,
"learning_rate": 9.56116585165398e-06,
"loss": 0.595,
"step": 2531
},
{
"epoch": 0.16,
"grad_norm": 0.9580360651016235,
"learning_rate": 9.560745435377828e-06,
"loss": 0.604,
"step": 2532
},
{
"epoch": 0.16,
"grad_norm": 0.8849531412124634,
"learning_rate": 9.560324827064553e-06,
"loss": 0.6313,
"step": 2533
},
{
"epoch": 0.16,
"grad_norm": 0.8849808573722839,
"learning_rate": 9.559904026731862e-06,
"loss": 0.5895,
"step": 2534
},
{
"epoch": 0.16,
"grad_norm": 0.8286584615707397,
"learning_rate": 9.559483034397477e-06,
"loss": 0.6168,
"step": 2535
},
{
"epoch": 0.16,
"grad_norm": 0.8422954678535461,
"learning_rate": 9.559061850079121e-06,
"loss": 0.5688,
"step": 2536
},
{
"epoch": 0.16,
"grad_norm": 0.9304640293121338,
"learning_rate": 9.558640473794533e-06,
"loss": 0.5911,
"step": 2537
},
{
"epoch": 0.16,
"grad_norm": 0.9410046339035034,
"learning_rate": 9.558218905561452e-06,
"loss": 0.6099,
"step": 2538
},
{
"epoch": 0.16,
"grad_norm": 0.8600730895996094,
"learning_rate": 9.557797145397629e-06,
"loss": 0.635,
"step": 2539
},
{
"epoch": 0.16,
"grad_norm": 0.923870325088501,
"learning_rate": 9.557375193320824e-06,
"loss": 0.6513,
"step": 2540
},
{
"epoch": 0.16,
"grad_norm": 0.9524445533752441,
"learning_rate": 9.556953049348803e-06,
"loss": 0.6036,
"step": 2541
},
{
"epoch": 0.16,
"grad_norm": 0.945360004901886,
"learning_rate": 9.556530713499341e-06,
"loss": 0.6471,
"step": 2542
},
{
"epoch": 0.16,
"grad_norm": 1.020447850227356,
"learning_rate": 9.556108185790223e-06,
"loss": 0.7046,
"step": 2543
},
{
"epoch": 0.16,
"grad_norm": 0.9810319542884827,
"learning_rate": 9.55568546623924e-06,
"loss": 0.6746,
"step": 2544
},
{
"epoch": 0.16,
"grad_norm": 0.9337319135665894,
"learning_rate": 9.555262554864188e-06,
"loss": 0.6229,
"step": 2545
},
{
"epoch": 0.16,
"grad_norm": 0.890835165977478,
"learning_rate": 9.554839451682876e-06,
"loss": 0.5636,
"step": 2546
},
{
"epoch": 0.16,
"grad_norm": 0.8403000831604004,
"learning_rate": 9.554416156713121e-06,
"loss": 0.6144,
"step": 2547
},
{
"epoch": 0.16,
"grad_norm": 0.8973768353462219,
"learning_rate": 9.553992669972744e-06,
"loss": 0.6128,
"step": 2548
},
{
"epoch": 0.16,
"grad_norm": 0.912047803401947,
"learning_rate": 9.55356899147958e-06,
"loss": 0.6295,
"step": 2549
},
{
"epoch": 0.16,
"grad_norm": 0.8875672817230225,
"learning_rate": 9.553145121251465e-06,
"loss": 0.6375,
"step": 2550
},
{
"epoch": 0.16,
"grad_norm": 0.8986533284187317,
"learning_rate": 9.552721059306248e-06,
"loss": 0.6332,
"step": 2551
},
{
"epoch": 0.16,
"grad_norm": 0.8964718580245972,
"learning_rate": 9.552296805661787e-06,
"loss": 0.6369,
"step": 2552
},
{
"epoch": 0.16,
"grad_norm": 0.9571990370750427,
"learning_rate": 9.551872360335941e-06,
"loss": 0.6474,
"step": 2553
},
{
"epoch": 0.16,
"grad_norm": 0.927249550819397,
"learning_rate": 9.551447723346587e-06,
"loss": 0.624,
"step": 2554
},
{
"epoch": 0.16,
"grad_norm": 0.9312215447425842,
"learning_rate": 9.5510228947116e-06,
"loss": 0.6383,
"step": 2555
},
{
"epoch": 0.16,
"grad_norm": 0.9223430156707764,
"learning_rate": 9.550597874448874e-06,
"loss": 0.6332,
"step": 2556
},
{
"epoch": 0.16,
"grad_norm": 0.8620796799659729,
"learning_rate": 9.5501726625763e-06,
"loss": 0.6429,
"step": 2557
},
{
"epoch": 0.16,
"grad_norm": 0.8788149356842041,
"learning_rate": 9.549747259111786e-06,
"loss": 0.6188,
"step": 2558
},
{
"epoch": 0.16,
"grad_norm": 0.9338142275810242,
"learning_rate": 9.54932166407324e-06,
"loss": 0.6234,
"step": 2559
},
{
"epoch": 0.16,
"grad_norm": 0.8641449213027954,
"learning_rate": 9.548895877478585e-06,
"loss": 0.6202,
"step": 2560
},
{
"epoch": 0.16,
"grad_norm": 0.9130368828773499,
"learning_rate": 9.54846989934575e-06,
"loss": 0.6636,
"step": 2561
},
{
"epoch": 0.16,
"grad_norm": 0.9087523818016052,
"learning_rate": 9.54804372969267e-06,
"loss": 0.6419,
"step": 2562
},
{
"epoch": 0.16,
"grad_norm": 0.8906131982803345,
"learning_rate": 9.54761736853729e-06,
"loss": 0.5957,
"step": 2563
},
{
"epoch": 0.16,
"grad_norm": 0.8853945732116699,
"learning_rate": 9.547190815897563e-06,
"loss": 0.5888,
"step": 2564
},
{
"epoch": 0.16,
"grad_norm": 0.951070249080658,
"learning_rate": 9.54676407179145e-06,
"loss": 0.6681,
"step": 2565
},
{
"epoch": 0.16,
"grad_norm": 0.9170838594436646,
"learning_rate": 9.546337136236916e-06,
"loss": 0.6224,
"step": 2566
},
{
"epoch": 0.16,
"grad_norm": 0.955334484577179,
"learning_rate": 9.545910009251945e-06,
"loss": 0.6488,
"step": 2567
},
{
"epoch": 0.16,
"grad_norm": 0.8778351545333862,
"learning_rate": 9.545482690854513e-06,
"loss": 0.6396,
"step": 2568
},
{
"epoch": 0.16,
"grad_norm": 0.8910854458808899,
"learning_rate": 9.545055181062621e-06,
"loss": 0.6397,
"step": 2569
},
{
"epoch": 0.16,
"grad_norm": 1.0262346267700195,
"learning_rate": 9.544627479894264e-06,
"loss": 0.6648,
"step": 2570
},
{
"epoch": 0.16,
"grad_norm": 0.881415843963623,
"learning_rate": 9.544199587367455e-06,
"loss": 0.6112,
"step": 2571
},
{
"epoch": 0.16,
"grad_norm": 0.8958014249801636,
"learning_rate": 9.54377150350021e-06,
"loss": 0.6493,
"step": 2572
},
{
"epoch": 0.16,
"grad_norm": 0.9083918929100037,
"learning_rate": 9.543343228310551e-06,
"loss": 0.578,
"step": 2573
},
{
"epoch": 0.16,
"grad_norm": 0.9322221279144287,
"learning_rate": 9.542914761816518e-06,
"loss": 0.6487,
"step": 2574
},
{
"epoch": 0.16,
"grad_norm": 0.914716362953186,
"learning_rate": 9.542486104036143e-06,
"loss": 0.6269,
"step": 2575
},
{
"epoch": 0.16,
"grad_norm": 0.9125852584838867,
"learning_rate": 9.542057254987485e-06,
"loss": 0.6308,
"step": 2576
},
{
"epoch": 0.16,
"grad_norm": 0.9945306777954102,
"learning_rate": 9.541628214688595e-06,
"loss": 0.6203,
"step": 2577
},
{
"epoch": 0.16,
"grad_norm": 0.9009057283401489,
"learning_rate": 9.541198983157538e-06,
"loss": 0.6603,
"step": 2578
},
{
"epoch": 0.16,
"grad_norm": 0.8918367028236389,
"learning_rate": 9.54076956041239e-06,
"loss": 0.6313,
"step": 2579
},
{
"epoch": 0.16,
"grad_norm": 0.8985729217529297,
"learning_rate": 9.540339946471235e-06,
"loss": 0.6205,
"step": 2580
},
{
"epoch": 0.16,
"grad_norm": 0.8877277970314026,
"learning_rate": 9.539910141352156e-06,
"loss": 0.6364,
"step": 2581
},
{
"epoch": 0.16,
"grad_norm": 0.9015381336212158,
"learning_rate": 9.539480145073257e-06,
"loss": 0.5959,
"step": 2582
},
{
"epoch": 0.16,
"grad_norm": 0.9096758365631104,
"learning_rate": 9.53904995765264e-06,
"loss": 0.6062,
"step": 2583
},
{
"epoch": 0.16,
"grad_norm": 0.9512980580329895,
"learning_rate": 9.538619579108417e-06,
"loss": 0.6782,
"step": 2584
},
{
"epoch": 0.16,
"grad_norm": 0.9591136574745178,
"learning_rate": 9.538189009458715e-06,
"loss": 0.6716,
"step": 2585
},
{
"epoch": 0.16,
"grad_norm": 0.9070512056350708,
"learning_rate": 9.53775824872166e-06,
"loss": 0.5908,
"step": 2586
},
{
"epoch": 0.16,
"grad_norm": 0.8964409232139587,
"learning_rate": 9.53732729691539e-06,
"loss": 0.622,
"step": 2587
},
{
"epoch": 0.16,
"grad_norm": 0.9467551708221436,
"learning_rate": 9.536896154058053e-06,
"loss": 0.6137,
"step": 2588
},
{
"epoch": 0.16,
"grad_norm": 0.8990939259529114,
"learning_rate": 9.536464820167804e-06,
"loss": 0.6319,
"step": 2589
},
{
"epoch": 0.16,
"grad_norm": 0.8276720643043518,
"learning_rate": 9.536033295262799e-06,
"loss": 0.5556,
"step": 2590
},
{
"epoch": 0.16,
"grad_norm": 0.8583798408508301,
"learning_rate": 9.535601579361214e-06,
"loss": 0.5774,
"step": 2591
},
{
"epoch": 0.16,
"grad_norm": 0.9028250575065613,
"learning_rate": 9.535169672481222e-06,
"loss": 0.629,
"step": 2592
},
{
"epoch": 0.16,
"grad_norm": 0.9669902920722961,
"learning_rate": 9.534737574641014e-06,
"loss": 0.6514,
"step": 2593
},
{
"epoch": 0.16,
"grad_norm": 0.9334651827812195,
"learning_rate": 9.53430528585878e-06,
"loss": 0.6385,
"step": 2594
},
{
"epoch": 0.16,
"grad_norm": 0.8801825046539307,
"learning_rate": 9.533872806152727e-06,
"loss": 0.6043,
"step": 2595
},
{
"epoch": 0.16,
"grad_norm": 0.9169769883155823,
"learning_rate": 9.533440135541059e-06,
"loss": 0.6202,
"step": 2596
},
{
"epoch": 0.16,
"grad_norm": 0.90007483959198,
"learning_rate": 9.533007274042e-06,
"loss": 0.6977,
"step": 2597
},
{
"epoch": 0.16,
"grad_norm": 0.887588620185852,
"learning_rate": 9.532574221673772e-06,
"loss": 0.6228,
"step": 2598
},
{
"epoch": 0.16,
"grad_norm": 0.9043447971343994,
"learning_rate": 9.532140978454614e-06,
"loss": 0.6192,
"step": 2599
},
{
"epoch": 0.16,
"grad_norm": 0.9651160836219788,
"learning_rate": 9.531707544402762e-06,
"loss": 0.6675,
"step": 2600
},
{
"epoch": 0.16,
"grad_norm": 0.9440380334854126,
"learning_rate": 9.531273919536473e-06,
"loss": 0.6294,
"step": 2601
},
{
"epoch": 0.16,
"grad_norm": 0.9147106409072876,
"learning_rate": 9.530840103874001e-06,
"loss": 0.6483,
"step": 2602
},
{
"epoch": 0.16,
"grad_norm": 0.9056714177131653,
"learning_rate": 9.530406097433615e-06,
"loss": 0.5734,
"step": 2603
},
{
"epoch": 0.16,
"grad_norm": 0.9497922658920288,
"learning_rate": 9.529971900233587e-06,
"loss": 0.5915,
"step": 2604
},
{
"epoch": 0.17,
"grad_norm": 0.8961224555969238,
"learning_rate": 9.529537512292201e-06,
"loss": 0.6239,
"step": 2605
},
{
"epoch": 0.17,
"grad_norm": 0.9149653315544128,
"learning_rate": 9.529102933627747e-06,
"loss": 0.6477,
"step": 2606
},
{
"epoch": 0.17,
"grad_norm": 0.904569685459137,
"learning_rate": 9.528668164258525e-06,
"loss": 0.6361,
"step": 2607
},
{
"epoch": 0.17,
"grad_norm": 0.8962168097496033,
"learning_rate": 9.528233204202842e-06,
"loss": 0.6214,
"step": 2608
},
{
"epoch": 0.17,
"grad_norm": 0.891830325126648,
"learning_rate": 9.527798053479009e-06,
"loss": 0.5854,
"step": 2609
},
{
"epoch": 0.17,
"grad_norm": 0.9612575173377991,
"learning_rate": 9.527362712105353e-06,
"loss": 0.6016,
"step": 2610
},
{
"epoch": 0.17,
"grad_norm": 0.9431421756744385,
"learning_rate": 9.5269271801002e-06,
"loss": 0.6032,
"step": 2611
},
{
"epoch": 0.17,
"grad_norm": 0.8791323304176331,
"learning_rate": 9.526491457481895e-06,
"loss": 0.6002,
"step": 2612
},
{
"epoch": 0.17,
"grad_norm": 0.9468672275543213,
"learning_rate": 9.526055544268778e-06,
"loss": 0.6101,
"step": 2613
},
{
"epoch": 0.17,
"grad_norm": 0.8586993217468262,
"learning_rate": 9.525619440479209e-06,
"loss": 0.5971,
"step": 2614
},
{
"epoch": 0.17,
"grad_norm": 0.88875812292099,
"learning_rate": 9.525183146131549e-06,
"loss": 0.6711,
"step": 2615
},
{
"epoch": 0.17,
"grad_norm": 0.9012202620506287,
"learning_rate": 9.524746661244166e-06,
"loss": 0.6357,
"step": 2616
},
{
"epoch": 0.17,
"grad_norm": 0.9038097858428955,
"learning_rate": 9.524309985835444e-06,
"loss": 0.6106,
"step": 2617
},
{
"epoch": 0.17,
"grad_norm": 0.9143322706222534,
"learning_rate": 9.523873119923768e-06,
"loss": 0.5951,
"step": 2618
},
{
"epoch": 0.17,
"grad_norm": 0.9046504497528076,
"learning_rate": 9.523436063527531e-06,
"loss": 0.5902,
"step": 2619
},
{
"epoch": 0.17,
"grad_norm": 0.959321916103363,
"learning_rate": 9.522998816665137e-06,
"loss": 0.6532,
"step": 2620
},
{
"epoch": 0.17,
"grad_norm": 0.8277800679206848,
"learning_rate": 9.522561379354997e-06,
"loss": 0.6249,
"step": 2621
},
{
"epoch": 0.17,
"grad_norm": 0.9031876921653748,
"learning_rate": 9.522123751615532e-06,
"loss": 0.6575,
"step": 2622
},
{
"epoch": 0.17,
"grad_norm": 0.9128404855728149,
"learning_rate": 9.521685933465166e-06,
"loss": 0.6547,
"step": 2623
},
{
"epoch": 0.17,
"grad_norm": 0.8987665772438049,
"learning_rate": 9.521247924922334e-06,
"loss": 0.6212,
"step": 2624
},
{
"epoch": 0.17,
"grad_norm": 0.944159209728241,
"learning_rate": 9.520809726005481e-06,
"loss": 0.5963,
"step": 2625
},
{
"epoch": 0.17,
"grad_norm": 0.9575842022895813,
"learning_rate": 9.52037133673306e-06,
"loss": 0.5637,
"step": 2626
},
{
"epoch": 0.17,
"grad_norm": 0.8398919105529785,
"learning_rate": 9.519932757123523e-06,
"loss": 0.664,
"step": 2627
},
{
"epoch": 0.17,
"grad_norm": 0.9531906843185425,
"learning_rate": 9.519493987195343e-06,
"loss": 0.5932,
"step": 2628
},
{
"epoch": 0.17,
"grad_norm": 0.9427643418312073,
"learning_rate": 9.519055026966995e-06,
"loss": 0.5979,
"step": 2629
},
{
"epoch": 0.17,
"grad_norm": 0.9445648193359375,
"learning_rate": 9.518615876456958e-06,
"loss": 0.6406,
"step": 2630
},
{
"epoch": 0.17,
"grad_norm": 0.8915479183197021,
"learning_rate": 9.518176535683727e-06,
"loss": 0.5887,
"step": 2631
},
{
"epoch": 0.17,
"grad_norm": 0.9278690218925476,
"learning_rate": 9.5177370046658e-06,
"loss": 0.6604,
"step": 2632
},
{
"epoch": 0.17,
"grad_norm": 0.9619773626327515,
"learning_rate": 9.517297283421681e-06,
"loss": 0.6622,
"step": 2633
},
{
"epoch": 0.17,
"grad_norm": 0.9478781819343567,
"learning_rate": 9.51685737196989e-06,
"loss": 0.6336,
"step": 2634
},
{
"epoch": 0.17,
"grad_norm": 0.8679977059364319,
"learning_rate": 9.516417270328948e-06,
"loss": 0.6031,
"step": 2635
},
{
"epoch": 0.17,
"grad_norm": 0.9029505252838135,
"learning_rate": 9.515976978517387e-06,
"loss": 0.6204,
"step": 2636
},
{
"epoch": 0.17,
"grad_norm": 0.8872044086456299,
"learning_rate": 9.515536496553744e-06,
"loss": 0.578,
"step": 2637
},
{
"epoch": 0.17,
"grad_norm": 0.9961317777633667,
"learning_rate": 9.515095824456568e-06,
"loss": 0.6484,
"step": 2638
},
{
"epoch": 0.17,
"grad_norm": 0.8571626543998718,
"learning_rate": 9.514654962244414e-06,
"loss": 0.6417,
"step": 2639
},
{
"epoch": 0.17,
"grad_norm": 0.8865385055541992,
"learning_rate": 9.514213909935843e-06,
"loss": 0.5677,
"step": 2640
},
{
"epoch": 0.17,
"grad_norm": 0.9392569661140442,
"learning_rate": 9.51377266754943e-06,
"loss": 0.6493,
"step": 2641
},
{
"epoch": 0.17,
"grad_norm": 0.9384260773658752,
"learning_rate": 9.513331235103751e-06,
"loss": 0.6117,
"step": 2642
},
{
"epoch": 0.17,
"grad_norm": 1.0064356327056885,
"learning_rate": 9.512889612617397e-06,
"loss": 0.6214,
"step": 2643
},
{
"epoch": 0.17,
"grad_norm": 0.8559515476226807,
"learning_rate": 9.512447800108958e-06,
"loss": 0.6171,
"step": 2644
},
{
"epoch": 0.17,
"grad_norm": 0.9168458580970764,
"learning_rate": 9.512005797597042e-06,
"loss": 0.6406,
"step": 2645
},
{
"epoch": 0.17,
"grad_norm": 0.9505908489227295,
"learning_rate": 9.511563605100255e-06,
"loss": 0.63,
"step": 2646
},
{
"epoch": 0.17,
"grad_norm": 0.9313047528266907,
"learning_rate": 9.511121222637222e-06,
"loss": 0.6543,
"step": 2647
},
{
"epoch": 0.17,
"grad_norm": 0.8740178346633911,
"learning_rate": 9.510678650226567e-06,
"loss": 0.5734,
"step": 2648
},
{
"epoch": 0.17,
"grad_norm": 0.9065948128700256,
"learning_rate": 9.510235887886923e-06,
"loss": 0.6048,
"step": 2649
},
{
"epoch": 0.17,
"grad_norm": 0.9390092492103577,
"learning_rate": 9.509792935636939e-06,
"loss": 0.5976,
"step": 2650
},
{
"epoch": 0.17,
"grad_norm": 0.9297692179679871,
"learning_rate": 9.50934979349526e-06,
"loss": 0.5868,
"step": 2651
},
{
"epoch": 0.17,
"grad_norm": 0.9775800704956055,
"learning_rate": 9.508906461480549e-06,
"loss": 0.6938,
"step": 2652
},
{
"epoch": 0.17,
"grad_norm": 0.934540867805481,
"learning_rate": 9.508462939611473e-06,
"loss": 0.6,
"step": 2653
},
{
"epoch": 0.17,
"grad_norm": 0.9152988195419312,
"learning_rate": 9.508019227906706e-06,
"loss": 0.6573,
"step": 2654
},
{
"epoch": 0.17,
"grad_norm": 0.9159802794456482,
"learning_rate": 9.507575326384932e-06,
"loss": 0.5607,
"step": 2655
},
{
"epoch": 0.17,
"grad_norm": 0.9005085229873657,
"learning_rate": 9.507131235064842e-06,
"loss": 0.6402,
"step": 2656
},
{
"epoch": 0.17,
"grad_norm": 0.9148140549659729,
"learning_rate": 9.506686953965134e-06,
"loss": 0.6254,
"step": 2657
},
{
"epoch": 0.17,
"grad_norm": 0.8619657754898071,
"learning_rate": 9.506242483104517e-06,
"loss": 0.534,
"step": 2658
},
{
"epoch": 0.17,
"grad_norm": 0.8992459774017334,
"learning_rate": 9.505797822501704e-06,
"loss": 0.6414,
"step": 2659
},
{
"epoch": 0.17,
"grad_norm": 0.9422406554222107,
"learning_rate": 9.505352972175419e-06,
"loss": 0.6557,
"step": 2660
},
{
"epoch": 0.17,
"grad_norm": 0.9567902088165283,
"learning_rate": 9.504907932144394e-06,
"loss": 0.6674,
"step": 2661
},
{
"epoch": 0.17,
"grad_norm": 0.9111477136611938,
"learning_rate": 9.504462702427369e-06,
"loss": 0.634,
"step": 2662
},
{
"epoch": 0.17,
"grad_norm": 0.9020829796791077,
"learning_rate": 9.504017283043087e-06,
"loss": 0.6443,
"step": 2663
},
{
"epoch": 0.17,
"grad_norm": 0.9128588438034058,
"learning_rate": 9.503571674010305e-06,
"loss": 0.651,
"step": 2664
},
{
"epoch": 0.17,
"grad_norm": 0.908065676689148,
"learning_rate": 9.503125875347789e-06,
"loss": 0.6225,
"step": 2665
},
{
"epoch": 0.17,
"grad_norm": 0.9279728531837463,
"learning_rate": 9.502679887074306e-06,
"loss": 0.6425,
"step": 2666
},
{
"epoch": 0.17,
"grad_norm": 0.8896051645278931,
"learning_rate": 9.502233709208637e-06,
"loss": 0.6823,
"step": 2667
},
{
"epoch": 0.17,
"grad_norm": 0.9090619087219238,
"learning_rate": 9.50178734176957e-06,
"loss": 0.5903,
"step": 2668
},
{
"epoch": 0.17,
"grad_norm": 0.8844740986824036,
"learning_rate": 9.501340784775896e-06,
"loss": 0.6276,
"step": 2669
},
{
"epoch": 0.17,
"grad_norm": 0.9212251901626587,
"learning_rate": 9.500894038246424e-06,
"loss": 0.5796,
"step": 2670
},
{
"epoch": 0.17,
"grad_norm": 0.9225980639457703,
"learning_rate": 9.50044710219996e-06,
"loss": 0.6326,
"step": 2671
},
{
"epoch": 0.17,
"grad_norm": 0.9283084869384766,
"learning_rate": 9.499999976655324e-06,
"loss": 0.6165,
"step": 2672
},
{
"epoch": 0.17,
"grad_norm": 0.8648502826690674,
"learning_rate": 9.499552661631342e-06,
"loss": 0.6137,
"step": 2673
},
{
"epoch": 0.17,
"grad_norm": 0.88034588098526,
"learning_rate": 9.49910515714685e-06,
"loss": 0.594,
"step": 2674
},
{
"epoch": 0.17,
"grad_norm": 0.841262698173523,
"learning_rate": 9.498657463220694e-06,
"loss": 0.5953,
"step": 2675
},
{
"epoch": 0.17,
"grad_norm": 0.9340731501579285,
"learning_rate": 9.49820957987172e-06,
"loss": 0.6236,
"step": 2676
},
{
"epoch": 0.17,
"grad_norm": 0.898252546787262,
"learning_rate": 9.49776150711879e-06,
"loss": 0.5813,
"step": 2677
},
{
"epoch": 0.17,
"grad_norm": 0.8751718997955322,
"learning_rate": 9.497313244980768e-06,
"loss": 0.5712,
"step": 2678
},
{
"epoch": 0.17,
"grad_norm": 0.8850248456001282,
"learning_rate": 9.496864793476532e-06,
"loss": 0.6464,
"step": 2679
},
{
"epoch": 0.17,
"grad_norm": 0.9821275472640991,
"learning_rate": 9.49641615262496e-06,
"loss": 0.6297,
"step": 2680
},
{
"epoch": 0.17,
"grad_norm": 0.8436826467514038,
"learning_rate": 9.49596732244495e-06,
"loss": 0.5828,
"step": 2681
},
{
"epoch": 0.17,
"grad_norm": 0.9077553749084473,
"learning_rate": 9.495518302955393e-06,
"loss": 0.6651,
"step": 2682
},
{
"epoch": 0.17,
"grad_norm": 0.9323903322219849,
"learning_rate": 9.4950690941752e-06,
"loss": 0.6516,
"step": 2683
},
{
"epoch": 0.17,
"grad_norm": 1.0304430723190308,
"learning_rate": 9.494619696123286e-06,
"loss": 0.6534,
"step": 2684
},
{
"epoch": 0.17,
"grad_norm": 0.9509037137031555,
"learning_rate": 9.49417010881857e-06,
"loss": 0.6013,
"step": 2685
},
{
"epoch": 0.17,
"grad_norm": 0.8547189831733704,
"learning_rate": 9.493720332279987e-06,
"loss": 0.5765,
"step": 2686
},
{
"epoch": 0.17,
"grad_norm": 0.9771583676338196,
"learning_rate": 9.493270366526471e-06,
"loss": 0.6383,
"step": 2687
},
{
"epoch": 0.17,
"grad_norm": 0.9149676561355591,
"learning_rate": 9.492820211576971e-06,
"loss": 0.6117,
"step": 2688
},
{
"epoch": 0.17,
"grad_norm": 0.8924671411514282,
"learning_rate": 9.492369867450444e-06,
"loss": 0.5931,
"step": 2689
},
{
"epoch": 0.17,
"grad_norm": 0.9182107448577881,
"learning_rate": 9.491919334165846e-06,
"loss": 0.6233,
"step": 2690
},
{
"epoch": 0.17,
"grad_norm": 0.9452329277992249,
"learning_rate": 9.491468611742154e-06,
"loss": 0.7153,
"step": 2691
},
{
"epoch": 0.17,
"grad_norm": 0.9435275197029114,
"learning_rate": 9.491017700198343e-06,
"loss": 0.6737,
"step": 2692
},
{
"epoch": 0.17,
"grad_norm": 0.9835942387580872,
"learning_rate": 9.490566599553399e-06,
"loss": 0.6323,
"step": 2693
},
{
"epoch": 0.17,
"grad_norm": 1.015770673751831,
"learning_rate": 9.490115309826317e-06,
"loss": 0.6106,
"step": 2694
},
{
"epoch": 0.17,
"grad_norm": 0.9940273761749268,
"learning_rate": 9.4896638310361e-06,
"loss": 0.6326,
"step": 2695
},
{
"epoch": 0.17,
"grad_norm": 0.9595569968223572,
"learning_rate": 9.489212163201758e-06,
"loss": 0.6314,
"step": 2696
},
{
"epoch": 0.17,
"grad_norm": 0.918870747089386,
"learning_rate": 9.488760306342307e-06,
"loss": 0.6369,
"step": 2697
},
{
"epoch": 0.17,
"grad_norm": 0.9247921705245972,
"learning_rate": 9.488308260476776e-06,
"loss": 0.5877,
"step": 2698
},
{
"epoch": 0.17,
"grad_norm": 0.8694366812705994,
"learning_rate": 9.487856025624196e-06,
"loss": 0.6188,
"step": 2699
},
{
"epoch": 0.17,
"grad_norm": 0.9364984631538391,
"learning_rate": 9.487403601803614e-06,
"loss": 0.5841,
"step": 2700
},
{
"epoch": 0.17,
"grad_norm": 0.8980706930160522,
"learning_rate": 9.486950989034074e-06,
"loss": 0.6324,
"step": 2701
},
{
"epoch": 0.17,
"grad_norm": 0.8469223380088806,
"learning_rate": 9.486498187334636e-06,
"loss": 0.5997,
"step": 2702
},
{
"epoch": 0.17,
"grad_norm": 0.9805670976638794,
"learning_rate": 9.48604519672437e-06,
"loss": 0.6745,
"step": 2703
},
{
"epoch": 0.17,
"grad_norm": 0.9122759103775024,
"learning_rate": 9.485592017222344e-06,
"loss": 0.6904,
"step": 2704
},
{
"epoch": 0.17,
"grad_norm": 0.9132962822914124,
"learning_rate": 9.485138648847643e-06,
"loss": 0.5926,
"step": 2705
},
{
"epoch": 0.17,
"grad_norm": 0.8468869924545288,
"learning_rate": 9.484685091619358e-06,
"loss": 0.6072,
"step": 2706
},
{
"epoch": 0.17,
"grad_norm": 0.9402836561203003,
"learning_rate": 9.484231345556582e-06,
"loss": 0.6308,
"step": 2707
},
{
"epoch": 0.17,
"grad_norm": 0.8940732479095459,
"learning_rate": 9.483777410678427e-06,
"loss": 0.63,
"step": 2708
},
{
"epoch": 0.17,
"grad_norm": 0.886562705039978,
"learning_rate": 9.483323287004001e-06,
"loss": 0.5811,
"step": 2709
},
{
"epoch": 0.17,
"grad_norm": 0.9191167950630188,
"learning_rate": 9.482868974552427e-06,
"loss": 0.6349,
"step": 2710
},
{
"epoch": 0.17,
"grad_norm": 0.936594545841217,
"learning_rate": 9.482414473342835e-06,
"loss": 0.72,
"step": 2711
},
{
"epoch": 0.17,
"grad_norm": 0.9029736518859863,
"learning_rate": 9.481959783394365e-06,
"loss": 0.6818,
"step": 2712
},
{
"epoch": 0.17,
"grad_norm": 0.9597886800765991,
"learning_rate": 9.48150490472616e-06,
"loss": 0.6462,
"step": 2713
},
{
"epoch": 0.17,
"grad_norm": 0.9007745385169983,
"learning_rate": 9.481049837357371e-06,
"loss": 0.6234,
"step": 2714
},
{
"epoch": 0.17,
"grad_norm": 0.8033143877983093,
"learning_rate": 9.480594581307164e-06,
"loss": 0.5724,
"step": 2715
},
{
"epoch": 0.17,
"grad_norm": 0.856959879398346,
"learning_rate": 9.480139136594706e-06,
"loss": 0.5977,
"step": 2716
},
{
"epoch": 0.17,
"grad_norm": 0.9320681095123291,
"learning_rate": 9.479683503239172e-06,
"loss": 0.6452,
"step": 2717
},
{
"epoch": 0.17,
"grad_norm": 0.8906647562980652,
"learning_rate": 9.479227681259751e-06,
"loss": 0.6675,
"step": 2718
},
{
"epoch": 0.17,
"grad_norm": 0.8599271774291992,
"learning_rate": 9.478771670675635e-06,
"loss": 0.6287,
"step": 2719
},
{
"epoch": 0.17,
"grad_norm": 0.8469679355621338,
"learning_rate": 9.478315471506023e-06,
"loss": 0.5967,
"step": 2720
},
{
"epoch": 0.17,
"grad_norm": 0.8832866549491882,
"learning_rate": 9.477859083770126e-06,
"loss": 0.6506,
"step": 2721
},
{
"epoch": 0.17,
"grad_norm": 0.8781976699829102,
"learning_rate": 9.477402507487162e-06,
"loss": 0.6026,
"step": 2722
},
{
"epoch": 0.17,
"grad_norm": 0.9236262440681458,
"learning_rate": 9.476945742676352e-06,
"loss": 0.5791,
"step": 2723
},
{
"epoch": 0.17,
"grad_norm": 0.9180050492286682,
"learning_rate": 9.476488789356933e-06,
"loss": 0.5972,
"step": 2724
},
{
"epoch": 0.17,
"grad_norm": 0.8968567848205566,
"learning_rate": 9.47603164754814e-06,
"loss": 0.6701,
"step": 2725
},
{
"epoch": 0.17,
"grad_norm": 0.9011199474334717,
"learning_rate": 9.47557431726923e-06,
"loss": 0.6389,
"step": 2726
},
{
"epoch": 0.17,
"grad_norm": 0.964178204536438,
"learning_rate": 9.475116798539451e-06,
"loss": 0.6804,
"step": 2727
},
{
"epoch": 0.17,
"grad_norm": 0.9103108048439026,
"learning_rate": 9.474659091378074e-06,
"loss": 0.5935,
"step": 2728
},
{
"epoch": 0.17,
"grad_norm": 0.9424949884414673,
"learning_rate": 9.474201195804367e-06,
"loss": 0.6662,
"step": 2729
},
{
"epoch": 0.17,
"grad_norm": 0.9513722658157349,
"learning_rate": 9.473743111837612e-06,
"loss": 0.6526,
"step": 2730
},
{
"epoch": 0.17,
"grad_norm": 0.9301340579986572,
"learning_rate": 9.4732848394971e-06,
"loss": 0.5824,
"step": 2731
},
{
"epoch": 0.17,
"grad_norm": 0.9112258553504944,
"learning_rate": 9.472826378802122e-06,
"loss": 0.6287,
"step": 2732
},
{
"epoch": 0.17,
"grad_norm": 0.9196444749832153,
"learning_rate": 9.472367729771987e-06,
"loss": 0.6376,
"step": 2733
},
{
"epoch": 0.17,
"grad_norm": 0.9066518545150757,
"learning_rate": 9.471908892426005e-06,
"loss": 0.6648,
"step": 2734
},
{
"epoch": 0.17,
"grad_norm": 0.8786914944648743,
"learning_rate": 9.471449866783495e-06,
"loss": 0.6161,
"step": 2735
},
{
"epoch": 0.17,
"grad_norm": 0.9169754385948181,
"learning_rate": 9.470990652863787e-06,
"loss": 0.6643,
"step": 2736
},
{
"epoch": 0.17,
"grad_norm": 0.9611136317253113,
"learning_rate": 9.470531250686216e-06,
"loss": 0.6446,
"step": 2737
},
{
"epoch": 0.17,
"grad_norm": 0.9478945732116699,
"learning_rate": 9.470071660270126e-06,
"loss": 0.6436,
"step": 2738
},
{
"epoch": 0.17,
"grad_norm": 0.8549840450286865,
"learning_rate": 9.469611881634868e-06,
"loss": 0.607,
"step": 2739
},
{
"epoch": 0.17,
"grad_norm": 0.9151300191879272,
"learning_rate": 9.469151914799803e-06,
"loss": 0.5987,
"step": 2740
},
{
"epoch": 0.17,
"grad_norm": 0.87184077501297,
"learning_rate": 9.468691759784298e-06,
"loss": 0.6307,
"step": 2741
},
{
"epoch": 0.17,
"grad_norm": 0.9251417517662048,
"learning_rate": 9.468231416607727e-06,
"loss": 0.5822,
"step": 2742
},
{
"epoch": 0.17,
"grad_norm": 0.9144605994224548,
"learning_rate": 9.467770885289477e-06,
"loss": 0.5699,
"step": 2743
},
{
"epoch": 0.17,
"grad_norm": 0.8591218590736389,
"learning_rate": 9.467310165848935e-06,
"loss": 0.6483,
"step": 2744
},
{
"epoch": 0.17,
"grad_norm": 0.8842750787734985,
"learning_rate": 9.466849258305504e-06,
"loss": 0.6478,
"step": 2745
},
{
"epoch": 0.17,
"grad_norm": 0.8982271552085876,
"learning_rate": 9.46638816267859e-06,
"loss": 0.6189,
"step": 2746
},
{
"epoch": 0.17,
"grad_norm": 1.1078075170516968,
"learning_rate": 9.465926878987609e-06,
"loss": 0.652,
"step": 2747
},
{
"epoch": 0.17,
"grad_norm": 0.9062262773513794,
"learning_rate": 9.46546540725198e-06,
"loss": 0.6205,
"step": 2748
},
{
"epoch": 0.17,
"grad_norm": 0.9785717725753784,
"learning_rate": 9.465003747491138e-06,
"loss": 0.6586,
"step": 2749
},
{
"epoch": 0.17,
"grad_norm": 0.9226608276367188,
"learning_rate": 9.464541899724522e-06,
"loss": 0.6167,
"step": 2750
},
{
"epoch": 0.17,
"grad_norm": 0.9549429416656494,
"learning_rate": 9.464079863971576e-06,
"loss": 0.6093,
"step": 2751
},
{
"epoch": 0.17,
"grad_norm": 0.9625465869903564,
"learning_rate": 9.463617640251756e-06,
"loss": 0.6058,
"step": 2752
},
{
"epoch": 0.17,
"grad_norm": 0.917473316192627,
"learning_rate": 9.463155228584526e-06,
"loss": 0.608,
"step": 2753
},
{
"epoch": 0.17,
"grad_norm": 0.969939649105072,
"learning_rate": 9.462692628989356e-06,
"loss": 0.5676,
"step": 2754
},
{
"epoch": 0.17,
"grad_norm": 0.9174929857254028,
"learning_rate": 9.462229841485723e-06,
"loss": 0.6664,
"step": 2755
},
{
"epoch": 0.17,
"grad_norm": 0.9567301273345947,
"learning_rate": 9.461766866093117e-06,
"loss": 0.6435,
"step": 2756
},
{
"epoch": 0.17,
"grad_norm": 0.8922646045684814,
"learning_rate": 9.461303702831026e-06,
"loss": 0.5949,
"step": 2757
},
{
"epoch": 0.17,
"grad_norm": 0.8556625843048096,
"learning_rate": 9.460840351718958e-06,
"loss": 0.5995,
"step": 2758
},
{
"epoch": 0.17,
"grad_norm": 0.9240930676460266,
"learning_rate": 9.46037681277642e-06,
"loss": 0.6158,
"step": 2759
},
{
"epoch": 0.17,
"grad_norm": 0.9151474833488464,
"learning_rate": 9.459913086022931e-06,
"loss": 0.6091,
"step": 2760
},
{
"epoch": 0.17,
"grad_norm": 0.937988817691803,
"learning_rate": 9.459449171478017e-06,
"loss": 0.5562,
"step": 2761
},
{
"epoch": 0.17,
"grad_norm": 0.8838707804679871,
"learning_rate": 9.458985069161212e-06,
"loss": 0.5736,
"step": 2762
},
{
"epoch": 0.18,
"grad_norm": 0.9612347483634949,
"learning_rate": 9.458520779092057e-06,
"loss": 0.5838,
"step": 2763
},
{
"epoch": 0.18,
"grad_norm": 1.0034922361373901,
"learning_rate": 9.458056301290102e-06,
"loss": 0.6895,
"step": 2764
},
{
"epoch": 0.18,
"grad_norm": 0.9068509340286255,
"learning_rate": 9.457591635774905e-06,
"loss": 0.687,
"step": 2765
},
{
"epoch": 0.18,
"grad_norm": 0.9105919599533081,
"learning_rate": 9.457126782566031e-06,
"loss": 0.6629,
"step": 2766
},
{
"epoch": 0.18,
"grad_norm": 0.9419427514076233,
"learning_rate": 9.456661741683054e-06,
"loss": 0.6553,
"step": 2767
},
{
"epoch": 0.18,
"grad_norm": 0.9317494034767151,
"learning_rate": 9.456196513145553e-06,
"loss": 0.619,
"step": 2768
},
{
"epoch": 0.18,
"grad_norm": 0.9247744679450989,
"learning_rate": 9.455731096973119e-06,
"loss": 0.6352,
"step": 2769
},
{
"epoch": 0.18,
"grad_norm": 0.9570684432983398,
"learning_rate": 9.455265493185349e-06,
"loss": 0.6674,
"step": 2770
},
{
"epoch": 0.18,
"grad_norm": 0.9092298150062561,
"learning_rate": 9.454799701801849e-06,
"loss": 0.6136,
"step": 2771
},
{
"epoch": 0.18,
"grad_norm": 0.9638829827308655,
"learning_rate": 9.45433372284223e-06,
"loss": 0.6206,
"step": 2772
},
{
"epoch": 0.18,
"grad_norm": 1.1069514751434326,
"learning_rate": 9.453867556326113e-06,
"loss": 0.6166,
"step": 2773
},
{
"epoch": 0.18,
"grad_norm": 0.958802342414856,
"learning_rate": 9.453401202273127e-06,
"loss": 0.6009,
"step": 2774
},
{
"epoch": 0.18,
"grad_norm": 0.8832184076309204,
"learning_rate": 9.45293466070291e-06,
"loss": 0.609,
"step": 2775
},
{
"epoch": 0.18,
"grad_norm": 0.9852387309074402,
"learning_rate": 9.452467931635104e-06,
"loss": 0.6633,
"step": 2776
},
{
"epoch": 0.18,
"grad_norm": 0.8827134370803833,
"learning_rate": 9.452001015089363e-06,
"loss": 0.6112,
"step": 2777
},
{
"epoch": 0.18,
"grad_norm": 0.9104273915290833,
"learning_rate": 9.451533911085346e-06,
"loss": 0.6043,
"step": 2778
},
{
"epoch": 0.18,
"grad_norm": 0.9635795950889587,
"learning_rate": 9.451066619642721e-06,
"loss": 0.628,
"step": 2779
},
{
"epoch": 0.18,
"grad_norm": 0.9080226421356201,
"learning_rate": 9.450599140781166e-06,
"loss": 0.6428,
"step": 2780
},
{
"epoch": 0.18,
"grad_norm": 0.8342934846878052,
"learning_rate": 9.450131474520364e-06,
"loss": 0.6056,
"step": 2781
},
{
"epoch": 0.18,
"grad_norm": 0.8714557886123657,
"learning_rate": 9.449663620880006e-06,
"loss": 0.6105,
"step": 2782
},
{
"epoch": 0.18,
"grad_norm": 0.8582709431648254,
"learning_rate": 9.449195579879793e-06,
"loss": 0.6117,
"step": 2783
},
{
"epoch": 0.18,
"grad_norm": 1.0167529582977295,
"learning_rate": 9.448727351539431e-06,
"loss": 0.6551,
"step": 2784
},
{
"epoch": 0.18,
"grad_norm": 0.8866241574287415,
"learning_rate": 9.448258935878635e-06,
"loss": 0.623,
"step": 2785
},
{
"epoch": 0.18,
"grad_norm": 0.9443932771682739,
"learning_rate": 9.44779033291713e-06,
"loss": 0.6456,
"step": 2786
},
{
"epoch": 0.18,
"grad_norm": 0.9517203569412231,
"learning_rate": 9.447321542674647e-06,
"loss": 0.6439,
"step": 2787
},
{
"epoch": 0.18,
"grad_norm": 0.9734207987785339,
"learning_rate": 9.446852565170928e-06,
"loss": 0.6553,
"step": 2788
},
{
"epoch": 0.18,
"grad_norm": 0.898755669593811,
"learning_rate": 9.446383400425713e-06,
"loss": 0.6615,
"step": 2789
},
{
"epoch": 0.18,
"grad_norm": 0.9627699851989746,
"learning_rate": 9.445914048458764e-06,
"loss": 0.5574,
"step": 2790
},
{
"epoch": 0.18,
"grad_norm": 0.8621180057525635,
"learning_rate": 9.445444509289838e-06,
"loss": 0.6064,
"step": 2791
},
{
"epoch": 0.18,
"grad_norm": 0.9321991205215454,
"learning_rate": 9.44497478293871e-06,
"loss": 0.6189,
"step": 2792
},
{
"epoch": 0.18,
"grad_norm": 0.9137430787086487,
"learning_rate": 9.444504869425154e-06,
"loss": 0.6378,
"step": 2793
},
{
"epoch": 0.18,
"grad_norm": 0.9660084843635559,
"learning_rate": 9.44403476876896e-06,
"loss": 0.6376,
"step": 2794
},
{
"epoch": 0.18,
"grad_norm": 0.8711713552474976,
"learning_rate": 9.443564480989924e-06,
"loss": 0.6145,
"step": 2795
},
{
"epoch": 0.18,
"grad_norm": 0.8694255352020264,
"learning_rate": 9.443094006107844e-06,
"loss": 0.6109,
"step": 2796
},
{
"epoch": 0.18,
"grad_norm": 0.9288530945777893,
"learning_rate": 9.442623344142534e-06,
"loss": 0.6055,
"step": 2797
},
{
"epoch": 0.18,
"grad_norm": 0.9127347469329834,
"learning_rate": 9.442152495113808e-06,
"loss": 0.6153,
"step": 2798
},
{
"epoch": 0.18,
"grad_norm": 0.8872652053833008,
"learning_rate": 9.441681459041494e-06,
"loss": 0.6426,
"step": 2799
},
{
"epoch": 0.18,
"grad_norm": 0.9660980105400085,
"learning_rate": 9.441210235945425e-06,
"loss": 0.6255,
"step": 2800
},
{
"epoch": 0.18,
"grad_norm": 0.8567848801612854,
"learning_rate": 9.440738825845441e-06,
"loss": 0.6009,
"step": 2801
},
{
"epoch": 0.18,
"grad_norm": 0.9663728475570679,
"learning_rate": 9.440267228761395e-06,
"loss": 0.6588,
"step": 2802
},
{
"epoch": 0.18,
"grad_norm": 0.9529426097869873,
"learning_rate": 9.439795444713143e-06,
"loss": 0.6628,
"step": 2803
},
{
"epoch": 0.18,
"grad_norm": 0.929195761680603,
"learning_rate": 9.43932347372055e-06,
"loss": 0.6209,
"step": 2804
},
{
"epoch": 0.18,
"grad_norm": 0.9078366160392761,
"learning_rate": 9.438851315803488e-06,
"loss": 0.5669,
"step": 2805
},
{
"epoch": 0.18,
"grad_norm": 0.9016088247299194,
"learning_rate": 9.438378970981839e-06,
"loss": 0.6074,
"step": 2806
},
{
"epoch": 0.18,
"grad_norm": 0.9534980654716492,
"learning_rate": 9.43790643927549e-06,
"loss": 0.7098,
"step": 2807
},
{
"epoch": 0.18,
"grad_norm": 0.8913077116012573,
"learning_rate": 9.437433720704342e-06,
"loss": 0.586,
"step": 2808
},
{
"epoch": 0.18,
"grad_norm": 1.0161441564559937,
"learning_rate": 9.436960815288294e-06,
"loss": 0.6038,
"step": 2809
},
{
"epoch": 0.18,
"grad_norm": 0.8946830034255981,
"learning_rate": 9.436487723047263e-06,
"loss": 0.6169,
"step": 2810
},
{
"epoch": 0.18,
"grad_norm": 0.9344162344932556,
"learning_rate": 9.436014444001167e-06,
"loss": 0.6332,
"step": 2811
},
{
"epoch": 0.18,
"grad_norm": 0.8833682537078857,
"learning_rate": 9.435540978169933e-06,
"loss": 0.6148,
"step": 2812
},
{
"epoch": 0.18,
"grad_norm": 0.9014259576797485,
"learning_rate": 9.435067325573499e-06,
"loss": 0.6617,
"step": 2813
},
{
"epoch": 0.18,
"grad_norm": 0.8786671757698059,
"learning_rate": 9.43459348623181e-06,
"loss": 0.6741,
"step": 2814
},
{
"epoch": 0.18,
"grad_norm": 0.9095485806465149,
"learning_rate": 9.434119460164816e-06,
"loss": 0.5859,
"step": 2815
},
{
"epoch": 0.18,
"grad_norm": 0.9492687582969666,
"learning_rate": 9.433645247392476e-06,
"loss": 0.6005,
"step": 2816
},
{
"epoch": 0.18,
"grad_norm": 0.9836667776107788,
"learning_rate": 9.433170847934759e-06,
"loss": 0.673,
"step": 2817
},
{
"epoch": 0.18,
"grad_norm": 0.9654482007026672,
"learning_rate": 9.432696261811637e-06,
"loss": 0.6462,
"step": 2818
},
{
"epoch": 0.18,
"grad_norm": 0.919657826423645,
"learning_rate": 9.432221489043097e-06,
"loss": 0.6495,
"step": 2819
},
{
"epoch": 0.18,
"grad_norm": 0.928325355052948,
"learning_rate": 9.43174652964913e-06,
"loss": 0.6354,
"step": 2820
},
{
"epoch": 0.18,
"grad_norm": 1.0097019672393799,
"learning_rate": 9.431271383649731e-06,
"loss": 0.636,
"step": 2821
},
{
"epoch": 0.18,
"grad_norm": 0.8387419581413269,
"learning_rate": 9.430796051064913e-06,
"loss": 0.6435,
"step": 2822
},
{
"epoch": 0.18,
"grad_norm": 0.9152708649635315,
"learning_rate": 9.430320531914683e-06,
"loss": 0.6436,
"step": 2823
},
{
"epoch": 0.18,
"grad_norm": 0.9267799854278564,
"learning_rate": 9.42984482621907e-06,
"loss": 0.6528,
"step": 2824
},
{
"epoch": 0.18,
"grad_norm": 0.8546323776245117,
"learning_rate": 9.4293689339981e-06,
"loss": 0.5591,
"step": 2825
},
{
"epoch": 0.18,
"grad_norm": 1.015834093093872,
"learning_rate": 9.428892855271813e-06,
"loss": 0.7004,
"step": 2826
},
{
"epoch": 0.18,
"grad_norm": 0.9022856950759888,
"learning_rate": 9.428416590060256e-06,
"loss": 0.6214,
"step": 2827
},
{
"epoch": 0.18,
"grad_norm": 0.9249994158744812,
"learning_rate": 9.427940138383482e-06,
"loss": 0.6688,
"step": 2828
},
{
"epoch": 0.18,
"grad_norm": 0.8863480091094971,
"learning_rate": 9.427463500261551e-06,
"loss": 0.6651,
"step": 2829
},
{
"epoch": 0.18,
"grad_norm": 0.8578901290893555,
"learning_rate": 9.426986675714535e-06,
"loss": 0.5767,
"step": 2830
},
{
"epoch": 0.18,
"grad_norm": 0.8513709902763367,
"learning_rate": 9.426509664762509e-06,
"loss": 0.545,
"step": 2831
},
{
"epoch": 0.18,
"grad_norm": 0.9681910872459412,
"learning_rate": 9.42603246742556e-06,
"loss": 0.6421,
"step": 2832
},
{
"epoch": 0.18,
"grad_norm": 0.9950567483901978,
"learning_rate": 9.425555083723783e-06,
"loss": 0.6663,
"step": 2833
},
{
"epoch": 0.18,
"grad_norm": 0.9001085162162781,
"learning_rate": 9.425077513677276e-06,
"loss": 0.61,
"step": 2834
},
{
"epoch": 0.18,
"grad_norm": 0.9015680551528931,
"learning_rate": 9.424599757306148e-06,
"loss": 0.6296,
"step": 2835
},
{
"epoch": 0.18,
"grad_norm": 0.862308144569397,
"learning_rate": 9.424121814630516e-06,
"loss": 0.5494,
"step": 2836
},
{
"epoch": 0.18,
"grad_norm": 0.913428008556366,
"learning_rate": 9.423643685670504e-06,
"loss": 0.6652,
"step": 2837
},
{
"epoch": 0.18,
"grad_norm": 0.8796103000640869,
"learning_rate": 9.423165370446249e-06,
"loss": 0.5867,
"step": 2838
},
{
"epoch": 0.18,
"grad_norm": 0.9445327520370483,
"learning_rate": 9.422686868977884e-06,
"loss": 0.5812,
"step": 2839
},
{
"epoch": 0.18,
"grad_norm": 1.0006681680679321,
"learning_rate": 9.42220818128556e-06,
"loss": 0.6484,
"step": 2840
},
{
"epoch": 0.18,
"grad_norm": 0.9889962077140808,
"learning_rate": 9.421729307389435e-06,
"loss": 0.6266,
"step": 2841
},
{
"epoch": 0.18,
"grad_norm": 0.8913476467132568,
"learning_rate": 9.42125024730967e-06,
"loss": 0.6197,
"step": 2842
},
{
"epoch": 0.18,
"grad_norm": 1.0092391967773438,
"learning_rate": 9.420771001066439e-06,
"loss": 0.6748,
"step": 2843
},
{
"epoch": 0.18,
"grad_norm": 0.9135981202125549,
"learning_rate": 9.420291568679917e-06,
"loss": 0.6796,
"step": 2844
},
{
"epoch": 0.18,
"grad_norm": 0.9135114550590515,
"learning_rate": 9.419811950170294e-06,
"loss": 0.6444,
"step": 2845
},
{
"epoch": 0.18,
"grad_norm": 0.9234583377838135,
"learning_rate": 9.419332145557768e-06,
"loss": 0.652,
"step": 2846
},
{
"epoch": 0.18,
"grad_norm": 1.013744831085205,
"learning_rate": 9.418852154862538e-06,
"loss": 0.6552,
"step": 2847
},
{
"epoch": 0.18,
"grad_norm": 0.8808279633522034,
"learning_rate": 9.418371978104816e-06,
"loss": 0.6126,
"step": 2848
},
{
"epoch": 0.18,
"grad_norm": 0.9165722131729126,
"learning_rate": 9.41789161530482e-06,
"loss": 0.6558,
"step": 2849
},
{
"epoch": 0.18,
"grad_norm": 0.9362298250198364,
"learning_rate": 9.417411066482777e-06,
"loss": 0.6204,
"step": 2850
},
{
"epoch": 0.18,
"grad_norm": 0.9138143658638,
"learning_rate": 9.41693033165892e-06,
"loss": 0.6359,
"step": 2851
},
{
"epoch": 0.18,
"grad_norm": 0.8916357755661011,
"learning_rate": 9.416449410853495e-06,
"loss": 0.6234,
"step": 2852
},
{
"epoch": 0.18,
"grad_norm": 1.0022516250610352,
"learning_rate": 9.415968304086746e-06,
"loss": 0.6353,
"step": 2853
},
{
"epoch": 0.18,
"grad_norm": 0.8648804426193237,
"learning_rate": 9.415487011378935e-06,
"loss": 0.6154,
"step": 2854
},
{
"epoch": 0.18,
"grad_norm": 0.9364731311798096,
"learning_rate": 9.415005532750326e-06,
"loss": 0.5895,
"step": 2855
},
{
"epoch": 0.18,
"grad_norm": 0.961506187915802,
"learning_rate": 9.414523868221192e-06,
"loss": 0.6945,
"step": 2856
},
{
"epoch": 0.18,
"grad_norm": 0.8515611886978149,
"learning_rate": 9.414042017811817e-06,
"loss": 0.6253,
"step": 2857
},
{
"epoch": 0.18,
"grad_norm": 0.8460178375244141,
"learning_rate": 9.413559981542486e-06,
"loss": 0.6468,
"step": 2858
},
{
"epoch": 0.18,
"grad_norm": 0.9305799603462219,
"learning_rate": 9.413077759433498e-06,
"loss": 0.644,
"step": 2859
},
{
"epoch": 0.18,
"grad_norm": 0.9662857055664062,
"learning_rate": 9.412595351505158e-06,
"loss": 0.6078,
"step": 2860
},
{
"epoch": 0.18,
"grad_norm": 0.8659998178482056,
"learning_rate": 9.412112757777777e-06,
"loss": 0.6536,
"step": 2861
},
{
"epoch": 0.18,
"grad_norm": 0.9392401576042175,
"learning_rate": 9.411629978271679e-06,
"loss": 0.6528,
"step": 2862
},
{
"epoch": 0.18,
"grad_norm": 0.9797030687332153,
"learning_rate": 9.411147013007188e-06,
"loss": 0.6421,
"step": 2863
},
{
"epoch": 0.18,
"grad_norm": 0.9174354672431946,
"learning_rate": 9.41066386200464e-06,
"loss": 0.6395,
"step": 2864
},
{
"epoch": 0.18,
"grad_norm": 0.8441389203071594,
"learning_rate": 9.410180525284384e-06,
"loss": 0.5613,
"step": 2865
},
{
"epoch": 0.18,
"grad_norm": 0.8536418080329895,
"learning_rate": 9.409697002866765e-06,
"loss": 0.5965,
"step": 2866
},
{
"epoch": 0.18,
"grad_norm": 0.9425634145736694,
"learning_rate": 9.409213294772147e-06,
"loss": 0.6575,
"step": 2867
},
{
"epoch": 0.18,
"grad_norm": 0.9369651079177856,
"learning_rate": 9.408729401020896e-06,
"loss": 0.6457,
"step": 2868
},
{
"epoch": 0.18,
"grad_norm": 0.9151921272277832,
"learning_rate": 9.408245321633385e-06,
"loss": 0.6186,
"step": 2869
},
{
"epoch": 0.18,
"grad_norm": 0.8802269697189331,
"learning_rate": 9.407761056629999e-06,
"loss": 0.5875,
"step": 2870
},
{
"epoch": 0.18,
"grad_norm": 0.9625882506370544,
"learning_rate": 9.40727660603113e-06,
"loss": 0.6164,
"step": 2871
},
{
"epoch": 0.18,
"grad_norm": 1.045422911643982,
"learning_rate": 9.406791969857173e-06,
"loss": 0.6814,
"step": 2872
},
{
"epoch": 0.18,
"grad_norm": 0.9631166458129883,
"learning_rate": 9.406307148128537e-06,
"loss": 0.6423,
"step": 2873
},
{
"epoch": 0.18,
"grad_norm": 1.0093629360198975,
"learning_rate": 9.405822140865636e-06,
"loss": 0.6567,
"step": 2874
},
{
"epoch": 0.18,
"grad_norm": 0.8591984510421753,
"learning_rate": 9.40533694808889e-06,
"loss": 0.5637,
"step": 2875
},
{
"epoch": 0.18,
"grad_norm": 0.8643238544464111,
"learning_rate": 9.404851569818731e-06,
"loss": 0.6406,
"step": 2876
},
{
"epoch": 0.18,
"grad_norm": 1.0501185655593872,
"learning_rate": 9.404366006075596e-06,
"loss": 0.7109,
"step": 2877
},
{
"epoch": 0.18,
"grad_norm": 0.9728371500968933,
"learning_rate": 9.403880256879931e-06,
"loss": 0.6682,
"step": 2878
},
{
"epoch": 0.18,
"grad_norm": 0.9601030349731445,
"learning_rate": 9.403394322252186e-06,
"loss": 0.6103,
"step": 2879
},
{
"epoch": 0.18,
"grad_norm": 0.8728528618812561,
"learning_rate": 9.402908202212826e-06,
"loss": 0.5668,
"step": 2880
},
{
"epoch": 0.18,
"grad_norm": 0.9390819072723389,
"learning_rate": 9.402421896782319e-06,
"loss": 0.6126,
"step": 2881
},
{
"epoch": 0.18,
"grad_norm": 0.8935672044754028,
"learning_rate": 9.401935405981138e-06,
"loss": 0.6379,
"step": 2882
},
{
"epoch": 0.18,
"grad_norm": 0.8943515419960022,
"learning_rate": 9.401448729829773e-06,
"loss": 0.6343,
"step": 2883
},
{
"epoch": 0.18,
"grad_norm": 0.8738617300987244,
"learning_rate": 9.400961868348713e-06,
"loss": 0.6473,
"step": 2884
},
{
"epoch": 0.18,
"grad_norm": 0.8747914433479309,
"learning_rate": 9.400474821558457e-06,
"loss": 0.5848,
"step": 2885
},
{
"epoch": 0.18,
"grad_norm": 0.8880560994148254,
"learning_rate": 9.399987589479516e-06,
"loss": 0.6521,
"step": 2886
},
{
"epoch": 0.18,
"grad_norm": 0.9082310199737549,
"learning_rate": 9.399500172132403e-06,
"loss": 0.6059,
"step": 2887
},
{
"epoch": 0.18,
"grad_norm": 0.945617139339447,
"learning_rate": 9.399012569537643e-06,
"loss": 0.6031,
"step": 2888
},
{
"epoch": 0.18,
"grad_norm": 0.9733775854110718,
"learning_rate": 9.398524781715767e-06,
"loss": 0.6393,
"step": 2889
},
{
"epoch": 0.18,
"grad_norm": 0.9139953851699829,
"learning_rate": 9.398036808687314e-06,
"loss": 0.5984,
"step": 2890
},
{
"epoch": 0.18,
"grad_norm": 0.896920919418335,
"learning_rate": 9.39754865047283e-06,
"loss": 0.6455,
"step": 2891
},
{
"epoch": 0.18,
"grad_norm": 0.8665996193885803,
"learning_rate": 9.39706030709287e-06,
"loss": 0.5565,
"step": 2892
},
{
"epoch": 0.18,
"grad_norm": 0.9414594173431396,
"learning_rate": 9.396571778567997e-06,
"loss": 0.6157,
"step": 2893
},
{
"epoch": 0.18,
"grad_norm": 0.9462769031524658,
"learning_rate": 9.396083064918782e-06,
"loss": 0.6198,
"step": 2894
},
{
"epoch": 0.18,
"grad_norm": 0.9393496513366699,
"learning_rate": 9.3955941661658e-06,
"loss": 0.6631,
"step": 2895
},
{
"epoch": 0.18,
"grad_norm": 0.8664448857307434,
"learning_rate": 9.39510508232964e-06,
"loss": 0.5953,
"step": 2896
},
{
"epoch": 0.18,
"grad_norm": 0.8992229700088501,
"learning_rate": 9.394615813430895e-06,
"loss": 0.604,
"step": 2897
},
{
"epoch": 0.18,
"grad_norm": 0.9086971282958984,
"learning_rate": 9.394126359490166e-06,
"loss": 0.6294,
"step": 2898
},
{
"epoch": 0.18,
"grad_norm": 0.9720740914344788,
"learning_rate": 9.393636720528061e-06,
"loss": 0.6259,
"step": 2899
},
{
"epoch": 0.18,
"grad_norm": 0.9365057349205017,
"learning_rate": 9.393146896565197e-06,
"loss": 0.6621,
"step": 2900
},
{
"epoch": 0.18,
"grad_norm": 1.003066897392273,
"learning_rate": 9.392656887622202e-06,
"loss": 0.639,
"step": 2901
},
{
"epoch": 0.18,
"grad_norm": 0.9764630794525146,
"learning_rate": 9.392166693719706e-06,
"loss": 0.6113,
"step": 2902
},
{
"epoch": 0.18,
"grad_norm": 0.901775598526001,
"learning_rate": 9.391676314878348e-06,
"loss": 0.6274,
"step": 2903
},
{
"epoch": 0.18,
"grad_norm": 0.8916495442390442,
"learning_rate": 9.391185751118782e-06,
"loss": 0.6762,
"step": 2904
},
{
"epoch": 0.18,
"grad_norm": 0.9183511137962341,
"learning_rate": 9.390695002461657e-06,
"loss": 0.6358,
"step": 2905
},
{
"epoch": 0.18,
"grad_norm": 0.8651520609855652,
"learning_rate": 9.390204068927638e-06,
"loss": 0.629,
"step": 2906
},
{
"epoch": 0.18,
"grad_norm": 0.9377386569976807,
"learning_rate": 9.389712950537399e-06,
"loss": 0.638,
"step": 2907
},
{
"epoch": 0.18,
"grad_norm": 0.9569635987281799,
"learning_rate": 9.38922164731162e-06,
"loss": 0.6532,
"step": 2908
},
{
"epoch": 0.18,
"grad_norm": 0.8829088807106018,
"learning_rate": 9.388730159270984e-06,
"loss": 0.524,
"step": 2909
},
{
"epoch": 0.18,
"grad_norm": 0.8493028879165649,
"learning_rate": 9.38823848643619e-06,
"loss": 0.6161,
"step": 2910
},
{
"epoch": 0.18,
"grad_norm": 0.9601280689239502,
"learning_rate": 9.38774662882794e-06,
"loss": 0.7469,
"step": 2911
},
{
"epoch": 0.18,
"grad_norm": 0.9152663946151733,
"learning_rate": 9.387254586466942e-06,
"loss": 0.6003,
"step": 2912
},
{
"epoch": 0.18,
"grad_norm": 0.8507892489433289,
"learning_rate": 9.386762359373915e-06,
"loss": 0.6036,
"step": 2913
},
{
"epoch": 0.18,
"grad_norm": 0.8200054168701172,
"learning_rate": 9.386269947569585e-06,
"loss": 0.5787,
"step": 2914
},
{
"epoch": 0.18,
"grad_norm": 0.9165661334991455,
"learning_rate": 9.385777351074688e-06,
"loss": 0.6487,
"step": 2915
},
{
"epoch": 0.18,
"grad_norm": 0.9168198108673096,
"learning_rate": 9.385284569909963e-06,
"loss": 0.628,
"step": 2916
},
{
"epoch": 0.18,
"grad_norm": 0.8360828161239624,
"learning_rate": 9.38479160409616e-06,
"loss": 0.6177,
"step": 2917
},
{
"epoch": 0.18,
"grad_norm": 0.9536049962043762,
"learning_rate": 9.384298453654037e-06,
"loss": 0.6575,
"step": 2918
},
{
"epoch": 0.18,
"grad_norm": 0.8601536154747009,
"learning_rate": 9.383805118604357e-06,
"loss": 0.6362,
"step": 2919
},
{
"epoch": 0.18,
"grad_norm": 0.90377277135849,
"learning_rate": 9.383311598967892e-06,
"loss": 0.6232,
"step": 2920
},
{
"epoch": 0.19,
"grad_norm": 0.9626878499984741,
"learning_rate": 9.382817894765426e-06,
"loss": 0.7203,
"step": 2921
},
{
"epoch": 0.19,
"grad_norm": 0.9490284323692322,
"learning_rate": 9.382324006017746e-06,
"loss": 0.6665,
"step": 2922
},
{
"epoch": 0.19,
"grad_norm": 0.9522665739059448,
"learning_rate": 9.381829932745646e-06,
"loss": 0.5931,
"step": 2923
},
{
"epoch": 0.19,
"grad_norm": 0.9432849884033203,
"learning_rate": 9.38133567496993e-06,
"loss": 0.6349,
"step": 2924
},
{
"epoch": 0.19,
"grad_norm": 0.9463351368904114,
"learning_rate": 9.380841232711412e-06,
"loss": 0.6388,
"step": 2925
},
{
"epoch": 0.19,
"grad_norm": 0.9243980646133423,
"learning_rate": 9.38034660599091e-06,
"loss": 0.5969,
"step": 2926
},
{
"epoch": 0.19,
"grad_norm": 0.8849940299987793,
"learning_rate": 9.379851794829247e-06,
"loss": 0.59,
"step": 2927
},
{
"epoch": 0.19,
"grad_norm": 0.8930582404136658,
"learning_rate": 9.379356799247263e-06,
"loss": 0.6975,
"step": 2928
},
{
"epoch": 0.19,
"grad_norm": 0.8990230560302734,
"learning_rate": 9.3788616192658e-06,
"loss": 0.6572,
"step": 2929
},
{
"epoch": 0.19,
"grad_norm": 0.9136034250259399,
"learning_rate": 9.378366254905706e-06,
"loss": 0.6293,
"step": 2930
},
{
"epoch": 0.19,
"grad_norm": 0.8949812054634094,
"learning_rate": 9.377870706187841e-06,
"loss": 0.5834,
"step": 2931
},
{
"epoch": 0.19,
"grad_norm": 0.9137758016586304,
"learning_rate": 9.37737497313307e-06,
"loss": 0.5751,
"step": 2932
},
{
"epoch": 0.19,
"grad_norm": 0.8495166301727295,
"learning_rate": 9.376879055762267e-06,
"loss": 0.5503,
"step": 2933
},
{
"epoch": 0.19,
"grad_norm": 0.900567889213562,
"learning_rate": 9.376382954096312e-06,
"loss": 0.6444,
"step": 2934
},
{
"epoch": 0.19,
"grad_norm": 0.8367151021957397,
"learning_rate": 9.375886668156095e-06,
"loss": 0.6396,
"step": 2935
},
{
"epoch": 0.19,
"grad_norm": 0.9150660634040833,
"learning_rate": 9.375390197962514e-06,
"loss": 0.5965,
"step": 2936
},
{
"epoch": 0.19,
"grad_norm": 0.9402359127998352,
"learning_rate": 9.374893543536471e-06,
"loss": 0.5989,
"step": 2937
},
{
"epoch": 0.19,
"grad_norm": 0.9302038550376892,
"learning_rate": 9.374396704898883e-06,
"loss": 0.6621,
"step": 2938
},
{
"epoch": 0.19,
"grad_norm": 0.9301861524581909,
"learning_rate": 9.373899682070664e-06,
"loss": 0.6565,
"step": 2939
},
{
"epoch": 0.19,
"grad_norm": 0.9140589237213135,
"learning_rate": 9.373402475072746e-06,
"loss": 0.6323,
"step": 2940
},
{
"epoch": 0.19,
"grad_norm": 0.8676826357841492,
"learning_rate": 9.372905083926064e-06,
"loss": 0.6269,
"step": 2941
},
{
"epoch": 0.19,
"grad_norm": 0.8796793818473816,
"learning_rate": 9.372407508651561e-06,
"loss": 0.6185,
"step": 2942
},
{
"epoch": 0.19,
"grad_norm": 0.8520810604095459,
"learning_rate": 9.371909749270189e-06,
"loss": 0.5918,
"step": 2943
},
{
"epoch": 0.19,
"grad_norm": 0.8967190384864807,
"learning_rate": 9.371411805802907e-06,
"loss": 0.6327,
"step": 2944
},
{
"epoch": 0.19,
"grad_norm": 0.9223288893699646,
"learning_rate": 9.370913678270678e-06,
"loss": 0.5942,
"step": 2945
},
{
"epoch": 0.19,
"grad_norm": 0.8543033599853516,
"learning_rate": 9.370415366694483e-06,
"loss": 0.6218,
"step": 2946
},
{
"epoch": 0.19,
"grad_norm": 0.9239391684532166,
"learning_rate": 9.369916871095299e-06,
"loss": 0.6351,
"step": 2947
},
{
"epoch": 0.19,
"grad_norm": 0.8336774110794067,
"learning_rate": 9.369418191494117e-06,
"loss": 0.6284,
"step": 2948
},
{
"epoch": 0.19,
"grad_norm": 0.9170488119125366,
"learning_rate": 9.368919327911934e-06,
"loss": 0.6324,
"step": 2949
},
{
"epoch": 0.19,
"grad_norm": 0.9082837104797363,
"learning_rate": 9.368420280369759e-06,
"loss": 0.6565,
"step": 2950
},
{
"epoch": 0.19,
"grad_norm": 0.8999912738800049,
"learning_rate": 9.3679210488886e-06,
"loss": 0.6246,
"step": 2951
},
{
"epoch": 0.19,
"grad_norm": 0.8772600293159485,
"learning_rate": 9.367421633489482e-06,
"loss": 0.6999,
"step": 2952
},
{
"epoch": 0.19,
"grad_norm": 0.8400874733924866,
"learning_rate": 9.366922034193431e-06,
"loss": 0.6037,
"step": 2953
},
{
"epoch": 0.19,
"grad_norm": 0.9329245090484619,
"learning_rate": 9.366422251021486e-06,
"loss": 0.6171,
"step": 2954
},
{
"epoch": 0.19,
"grad_norm": 0.8737487196922302,
"learning_rate": 9.365922283994689e-06,
"loss": 0.6164,
"step": 2955
},
{
"epoch": 0.19,
"grad_norm": 0.9520554542541504,
"learning_rate": 9.365422133134093e-06,
"loss": 0.6257,
"step": 2956
},
{
"epoch": 0.19,
"grad_norm": 0.8844176530838013,
"learning_rate": 9.364921798460756e-06,
"loss": 0.6177,
"step": 2957
},
{
"epoch": 0.19,
"grad_norm": 0.9074828028678894,
"learning_rate": 9.364421279995747e-06,
"loss": 0.6601,
"step": 2958
},
{
"epoch": 0.19,
"grad_norm": 0.8782038688659668,
"learning_rate": 9.36392057776014e-06,
"loss": 0.6358,
"step": 2959
},
{
"epoch": 0.19,
"grad_norm": 0.9199265241622925,
"learning_rate": 9.36341969177502e-06,
"loss": 0.5666,
"step": 2960
},
{
"epoch": 0.19,
"grad_norm": 0.887235164642334,
"learning_rate": 9.362918622061475e-06,
"loss": 0.6011,
"step": 2961
},
{
"epoch": 0.19,
"grad_norm": 0.882902204990387,
"learning_rate": 9.362417368640604e-06,
"loss": 0.5865,
"step": 2962
},
{
"epoch": 0.19,
"grad_norm": 0.8558497428894043,
"learning_rate": 9.361915931533513e-06,
"loss": 0.6236,
"step": 2963
},
{
"epoch": 0.19,
"grad_norm": 0.879278838634491,
"learning_rate": 9.36141431076132e-06,
"loss": 0.6493,
"step": 2964
},
{
"epoch": 0.19,
"grad_norm": 0.929413378238678,
"learning_rate": 9.360912506345139e-06,
"loss": 0.6075,
"step": 2965
},
{
"epoch": 0.19,
"grad_norm": 0.8678581118583679,
"learning_rate": 9.360410518306103e-06,
"loss": 0.5635,
"step": 2966
},
{
"epoch": 0.19,
"grad_norm": 0.8952652812004089,
"learning_rate": 9.359908346665349e-06,
"loss": 0.5775,
"step": 2967
},
{
"epoch": 0.19,
"grad_norm": 0.9281927943229675,
"learning_rate": 9.359405991444022e-06,
"loss": 0.6704,
"step": 2968
},
{
"epoch": 0.19,
"grad_norm": 0.958543598651886,
"learning_rate": 9.358903452663273e-06,
"loss": 0.6666,
"step": 2969
},
{
"epoch": 0.19,
"grad_norm": 0.8834668397903442,
"learning_rate": 9.358400730344265e-06,
"loss": 0.6254,
"step": 2970
},
{
"epoch": 0.19,
"grad_norm": 0.8528700470924377,
"learning_rate": 9.357897824508163e-06,
"loss": 0.5954,
"step": 2971
},
{
"epoch": 0.19,
"grad_norm": 0.9526364207267761,
"learning_rate": 9.357394735176144e-06,
"loss": 0.6404,
"step": 2972
},
{
"epoch": 0.19,
"grad_norm": 0.8991536498069763,
"learning_rate": 9.356891462369391e-06,
"loss": 0.6091,
"step": 2973
},
{
"epoch": 0.19,
"grad_norm": 0.916277289390564,
"learning_rate": 9.356388006109094e-06,
"loss": 0.6429,
"step": 2974
},
{
"epoch": 0.19,
"grad_norm": 0.8340256214141846,
"learning_rate": 9.355884366416454e-06,
"loss": 0.6042,
"step": 2975
},
{
"epoch": 0.19,
"grad_norm": 0.9151265621185303,
"learning_rate": 9.355380543312676e-06,
"loss": 0.6805,
"step": 2976
},
{
"epoch": 0.19,
"grad_norm": 0.9069379568099976,
"learning_rate": 9.354876536818974e-06,
"loss": 0.6437,
"step": 2977
},
{
"epoch": 0.19,
"grad_norm": 0.8915377259254456,
"learning_rate": 9.35437234695657e-06,
"loss": 0.6179,
"step": 2978
},
{
"epoch": 0.19,
"grad_norm": 0.9558995366096497,
"learning_rate": 9.353867973746696e-06,
"loss": 0.6258,
"step": 2979
},
{
"epoch": 0.19,
"grad_norm": 0.8677237629890442,
"learning_rate": 9.353363417210587e-06,
"loss": 0.5676,
"step": 2980
},
{
"epoch": 0.19,
"grad_norm": 0.9419227838516235,
"learning_rate": 9.352858677369488e-06,
"loss": 0.655,
"step": 2981
},
{
"epoch": 0.19,
"grad_norm": 0.8914104700088501,
"learning_rate": 9.352353754244654e-06,
"loss": 0.5995,
"step": 2982
},
{
"epoch": 0.19,
"grad_norm": 0.8787503838539124,
"learning_rate": 9.351848647857343e-06,
"loss": 0.6032,
"step": 2983
},
{
"epoch": 0.19,
"grad_norm": 0.856158971786499,
"learning_rate": 9.351343358228825e-06,
"loss": 0.5997,
"step": 2984
},
{
"epoch": 0.19,
"grad_norm": 0.8458609580993652,
"learning_rate": 9.350837885380375e-06,
"loss": 0.6437,
"step": 2985
},
{
"epoch": 0.19,
"grad_norm": 0.9018574953079224,
"learning_rate": 9.350332229333276e-06,
"loss": 0.607,
"step": 2986
},
{
"epoch": 0.19,
"grad_norm": 0.8946293592453003,
"learning_rate": 9.349826390108823e-06,
"loss": 0.6058,
"step": 2987
},
{
"epoch": 0.19,
"grad_norm": 0.8331573605537415,
"learning_rate": 9.349320367728312e-06,
"loss": 0.5638,
"step": 2988
},
{
"epoch": 0.19,
"grad_norm": 0.8785397410392761,
"learning_rate": 9.34881416221305e-06,
"loss": 0.6192,
"step": 2989
},
{
"epoch": 0.19,
"grad_norm": 0.9098055958747864,
"learning_rate": 9.348307773584351e-06,
"loss": 0.6112,
"step": 2990
},
{
"epoch": 0.19,
"grad_norm": 0.9168455600738525,
"learning_rate": 9.34780120186354e-06,
"loss": 0.6383,
"step": 2991
},
{
"epoch": 0.19,
"grad_norm": 0.9003625512123108,
"learning_rate": 9.347294447071945e-06,
"loss": 0.6049,
"step": 2992
},
{
"epoch": 0.19,
"grad_norm": 0.9203317165374756,
"learning_rate": 9.346787509230903e-06,
"loss": 0.6291,
"step": 2993
},
{
"epoch": 0.19,
"grad_norm": 0.8706764578819275,
"learning_rate": 9.346280388361761e-06,
"loss": 0.6065,
"step": 2994
},
{
"epoch": 0.19,
"grad_norm": 0.8694605827331543,
"learning_rate": 9.34577308448587e-06,
"loss": 0.6128,
"step": 2995
},
{
"epoch": 0.19,
"grad_norm": 0.8985933065414429,
"learning_rate": 9.345265597624595e-06,
"loss": 0.6279,
"step": 2996
},
{
"epoch": 0.19,
"grad_norm": 0.9904145002365112,
"learning_rate": 9.344757927799299e-06,
"loss": 0.6427,
"step": 2997
},
{
"epoch": 0.19,
"grad_norm": 0.943004846572876,
"learning_rate": 9.344250075031362e-06,
"loss": 0.6155,
"step": 2998
},
{
"epoch": 0.19,
"grad_norm": 0.8913044929504395,
"learning_rate": 9.343742039342168e-06,
"loss": 0.5856,
"step": 2999
},
{
"epoch": 0.19,
"grad_norm": 0.8846127986907959,
"learning_rate": 9.343233820753107e-06,
"loss": 0.6393,
"step": 3000
},
{
"epoch": 0.19,
"grad_norm": 0.8753595948219299,
"learning_rate": 9.34272541928558e-06,
"loss": 0.6275,
"step": 3001
},
{
"epoch": 0.19,
"grad_norm": 0.9006514549255371,
"learning_rate": 9.34221683496099e-06,
"loss": 0.5802,
"step": 3002
},
{
"epoch": 0.19,
"grad_norm": 0.9199402332305908,
"learning_rate": 9.341708067800757e-06,
"loss": 0.5987,
"step": 3003
},
{
"epoch": 0.19,
"grad_norm": 0.9146287441253662,
"learning_rate": 9.341199117826298e-06,
"loss": 0.6328,
"step": 3004
},
{
"epoch": 0.19,
"grad_norm": 0.8784115314483643,
"learning_rate": 9.340689985059048e-06,
"loss": 0.6419,
"step": 3005
},
{
"epoch": 0.19,
"grad_norm": 0.8956212997436523,
"learning_rate": 9.340180669520443e-06,
"loss": 0.6327,
"step": 3006
},
{
"epoch": 0.19,
"grad_norm": 0.9466882944107056,
"learning_rate": 9.339671171231929e-06,
"loss": 0.6233,
"step": 3007
},
{
"epoch": 0.19,
"grad_norm": 0.8755168318748474,
"learning_rate": 9.339161490214957e-06,
"loss": 0.6351,
"step": 3008
},
{
"epoch": 0.19,
"grad_norm": 0.8940410614013672,
"learning_rate": 9.33865162649099e-06,
"loss": 0.6076,
"step": 3009
},
{
"epoch": 0.19,
"grad_norm": 0.833020031452179,
"learning_rate": 9.338141580081496e-06,
"loss": 0.5649,
"step": 3010
},
{
"epoch": 0.19,
"grad_norm": 0.950567364692688,
"learning_rate": 9.337631351007953e-06,
"loss": 0.5768,
"step": 3011
},
{
"epoch": 0.19,
"grad_norm": 0.9161326289176941,
"learning_rate": 9.337120939291842e-06,
"loss": 0.6158,
"step": 3012
},
{
"epoch": 0.19,
"grad_norm": 0.9443663954734802,
"learning_rate": 9.336610344954656e-06,
"loss": 0.5929,
"step": 3013
},
{
"epoch": 0.19,
"grad_norm": 0.9030787348747253,
"learning_rate": 9.336099568017895e-06,
"loss": 0.583,
"step": 3014
},
{
"epoch": 0.19,
"grad_norm": 0.984470546245575,
"learning_rate": 9.335588608503065e-06,
"loss": 0.6265,
"step": 3015
},
{
"epoch": 0.19,
"grad_norm": 0.9294076561927795,
"learning_rate": 9.33507746643168e-06,
"loss": 0.6035,
"step": 3016
},
{
"epoch": 0.19,
"grad_norm": 0.8897981643676758,
"learning_rate": 9.334566141825266e-06,
"loss": 0.649,
"step": 3017
},
{
"epoch": 0.19,
"grad_norm": 0.8603422045707703,
"learning_rate": 9.334054634705347e-06,
"loss": 0.5839,
"step": 3018
},
{
"epoch": 0.19,
"grad_norm": 0.9341859817504883,
"learning_rate": 9.333542945093468e-06,
"loss": 0.6234,
"step": 3019
},
{
"epoch": 0.19,
"grad_norm": 0.885899007320404,
"learning_rate": 9.333031073011169e-06,
"loss": 0.6592,
"step": 3020
},
{
"epoch": 0.19,
"grad_norm": 0.9095667004585266,
"learning_rate": 9.332519018480005e-06,
"loss": 0.6809,
"step": 3021
},
{
"epoch": 0.19,
"grad_norm": 0.8997942805290222,
"learning_rate": 9.332006781521537e-06,
"loss": 0.5953,
"step": 3022
},
{
"epoch": 0.19,
"grad_norm": 0.8883410692214966,
"learning_rate": 9.331494362157335e-06,
"loss": 0.5812,
"step": 3023
},
{
"epoch": 0.19,
"grad_norm": 0.9656973481178284,
"learning_rate": 9.330981760408972e-06,
"loss": 0.6112,
"step": 3024
},
{
"epoch": 0.19,
"grad_norm": 0.862815260887146,
"learning_rate": 9.330468976298033e-06,
"loss": 0.6099,
"step": 3025
},
{
"epoch": 0.19,
"grad_norm": 0.9055874347686768,
"learning_rate": 9.329956009846111e-06,
"loss": 0.6427,
"step": 3026
},
{
"epoch": 0.19,
"grad_norm": 0.9218257665634155,
"learning_rate": 9.329442861074803e-06,
"loss": 0.6451,
"step": 3027
},
{
"epoch": 0.19,
"grad_norm": 0.8925780057907104,
"learning_rate": 9.328929530005717e-06,
"loss": 0.6358,
"step": 3028
},
{
"epoch": 0.19,
"grad_norm": 0.9461687803268433,
"learning_rate": 9.328416016660471e-06,
"loss": 0.6319,
"step": 3029
},
{
"epoch": 0.19,
"grad_norm": 0.8816470503807068,
"learning_rate": 9.327902321060681e-06,
"loss": 0.5662,
"step": 3030
},
{
"epoch": 0.19,
"grad_norm": 0.8994545936584473,
"learning_rate": 9.327388443227981e-06,
"loss": 0.6594,
"step": 3031
},
{
"epoch": 0.19,
"grad_norm": 0.9947099685668945,
"learning_rate": 9.326874383184006e-06,
"loss": 0.6412,
"step": 3032
},
{
"epoch": 0.19,
"grad_norm": 0.9207108020782471,
"learning_rate": 9.326360140950406e-06,
"loss": 0.6029,
"step": 3033
},
{
"epoch": 0.19,
"grad_norm": 0.9070324897766113,
"learning_rate": 9.325845716548827e-06,
"loss": 0.648,
"step": 3034
},
{
"epoch": 0.19,
"grad_norm": 0.959884762763977,
"learning_rate": 9.325331110000937e-06,
"loss": 0.6598,
"step": 3035
},
{
"epoch": 0.19,
"grad_norm": 0.9339284896850586,
"learning_rate": 9.324816321328398e-06,
"loss": 0.6646,
"step": 3036
},
{
"epoch": 0.19,
"grad_norm": 0.852982759475708,
"learning_rate": 9.324301350552889e-06,
"loss": 0.6305,
"step": 3037
},
{
"epoch": 0.19,
"grad_norm": 0.9116036891937256,
"learning_rate": 9.323786197696094e-06,
"loss": 0.6283,
"step": 3038
},
{
"epoch": 0.19,
"grad_norm": 0.9738210439682007,
"learning_rate": 9.323270862779704e-06,
"loss": 0.6546,
"step": 3039
},
{
"epoch": 0.19,
"grad_norm": 0.8786873817443848,
"learning_rate": 9.322755345825418e-06,
"loss": 0.5488,
"step": 3040
},
{
"epoch": 0.19,
"grad_norm": 0.891937255859375,
"learning_rate": 9.32223964685494e-06,
"loss": 0.6073,
"step": 3041
},
{
"epoch": 0.19,
"grad_norm": 0.9519621133804321,
"learning_rate": 9.321723765889987e-06,
"loss": 0.6103,
"step": 3042
},
{
"epoch": 0.19,
"grad_norm": 0.9369633197784424,
"learning_rate": 9.321207702952281e-06,
"loss": 0.6551,
"step": 3043
},
{
"epoch": 0.19,
"grad_norm": 1.0724352598190308,
"learning_rate": 9.320691458063552e-06,
"loss": 0.644,
"step": 3044
},
{
"epoch": 0.19,
"grad_norm": 0.8826418519020081,
"learning_rate": 9.320175031245535e-06,
"loss": 0.6231,
"step": 3045
},
{
"epoch": 0.19,
"grad_norm": 0.9791775345802307,
"learning_rate": 9.319658422519977e-06,
"loss": 0.6646,
"step": 3046
},
{
"epoch": 0.19,
"grad_norm": 1.0244020223617554,
"learning_rate": 9.319141631908628e-06,
"loss": 0.6662,
"step": 3047
},
{
"epoch": 0.19,
"grad_norm": 0.8816352486610413,
"learning_rate": 9.318624659433254e-06,
"loss": 0.6291,
"step": 3048
},
{
"epoch": 0.19,
"grad_norm": 0.9350719451904297,
"learning_rate": 9.318107505115615e-06,
"loss": 0.6312,
"step": 3049
},
{
"epoch": 0.19,
"grad_norm": 0.8697081208229065,
"learning_rate": 9.317590168977492e-06,
"loss": 0.6504,
"step": 3050
},
{
"epoch": 0.19,
"grad_norm": 0.9508548378944397,
"learning_rate": 9.317072651040666e-06,
"loss": 0.6127,
"step": 3051
},
{
"epoch": 0.19,
"grad_norm": 0.9957895278930664,
"learning_rate": 9.31655495132693e-06,
"loss": 0.6096,
"step": 3052
},
{
"epoch": 0.19,
"grad_norm": 0.906047523021698,
"learning_rate": 9.31603706985808e-06,
"loss": 0.6054,
"step": 3053
},
{
"epoch": 0.19,
"grad_norm": 0.9574893116950989,
"learning_rate": 9.315519006655925e-06,
"loss": 0.6771,
"step": 3054
},
{
"epoch": 0.19,
"grad_norm": 0.8845919966697693,
"learning_rate": 9.315000761742276e-06,
"loss": 0.6098,
"step": 3055
},
{
"epoch": 0.19,
"grad_norm": 0.9109580516815186,
"learning_rate": 9.314482335138954e-06,
"loss": 0.6058,
"step": 3056
},
{
"epoch": 0.19,
"grad_norm": 0.8939434289932251,
"learning_rate": 9.313963726867793e-06,
"loss": 0.6442,
"step": 3057
},
{
"epoch": 0.19,
"grad_norm": 0.9312341213226318,
"learning_rate": 9.313444936950626e-06,
"loss": 0.5768,
"step": 3058
},
{
"epoch": 0.19,
"grad_norm": 0.9663771986961365,
"learning_rate": 9.312925965409297e-06,
"loss": 0.6526,
"step": 3059
},
{
"epoch": 0.19,
"grad_norm": 1.0164662599563599,
"learning_rate": 9.312406812265659e-06,
"loss": 0.6253,
"step": 3060
},
{
"epoch": 0.19,
"grad_norm": 0.8911099433898926,
"learning_rate": 9.311887477541574e-06,
"loss": 0.6088,
"step": 3061
},
{
"epoch": 0.19,
"grad_norm": 0.8796306848526001,
"learning_rate": 9.311367961258906e-06,
"loss": 0.6459,
"step": 3062
},
{
"epoch": 0.19,
"grad_norm": 0.8623282313346863,
"learning_rate": 9.31084826343953e-06,
"loss": 0.6009,
"step": 3063
},
{
"epoch": 0.19,
"grad_norm": 0.9488338828086853,
"learning_rate": 9.310328384105331e-06,
"loss": 0.6573,
"step": 3064
},
{
"epoch": 0.19,
"grad_norm": 0.9341808557510376,
"learning_rate": 9.309808323278199e-06,
"loss": 0.5932,
"step": 3065
},
{
"epoch": 0.19,
"grad_norm": 0.8781132698059082,
"learning_rate": 9.30928808098003e-06,
"loss": 0.6225,
"step": 3066
},
{
"epoch": 0.19,
"grad_norm": 0.9114009737968445,
"learning_rate": 9.308767657232733e-06,
"loss": 0.6765,
"step": 3067
},
{
"epoch": 0.19,
"grad_norm": 0.9022600650787354,
"learning_rate": 9.308247052058217e-06,
"loss": 0.6255,
"step": 3068
},
{
"epoch": 0.19,
"grad_norm": 0.9390726089477539,
"learning_rate": 9.307726265478405e-06,
"loss": 0.6543,
"step": 3069
},
{
"epoch": 0.19,
"grad_norm": 0.8956183791160583,
"learning_rate": 9.307205297515225e-06,
"loss": 0.6966,
"step": 3070
},
{
"epoch": 0.19,
"grad_norm": 0.9602479338645935,
"learning_rate": 9.306684148190616e-06,
"loss": 0.6754,
"step": 3071
},
{
"epoch": 0.19,
"grad_norm": 0.885344922542572,
"learning_rate": 9.306162817526519e-06,
"loss": 0.6952,
"step": 3072
},
{
"epoch": 0.19,
"grad_norm": 0.8164680004119873,
"learning_rate": 9.305641305544884e-06,
"loss": 0.5571,
"step": 3073
},
{
"epoch": 0.19,
"grad_norm": 0.8812573552131653,
"learning_rate": 9.305119612267673e-06,
"loss": 0.6224,
"step": 3074
},
{
"epoch": 0.19,
"grad_norm": 0.9333205819129944,
"learning_rate": 9.30459773771685e-06,
"loss": 0.66,
"step": 3075
},
{
"epoch": 0.19,
"grad_norm": 0.9016597270965576,
"learning_rate": 9.304075681914392e-06,
"loss": 0.5679,
"step": 3076
},
{
"epoch": 0.19,
"grad_norm": 0.8855369687080383,
"learning_rate": 9.30355344488228e-06,
"loss": 0.5928,
"step": 3077
},
{
"epoch": 0.2,
"grad_norm": 0.885812520980835,
"learning_rate": 9.303031026642504e-06,
"loss": 0.5906,
"step": 3078
},
{
"epoch": 0.2,
"grad_norm": 0.8560954332351685,
"learning_rate": 9.302508427217059e-06,
"loss": 0.6125,
"step": 3079
},
{
"epoch": 0.2,
"grad_norm": 0.9503233432769775,
"learning_rate": 9.301985646627953e-06,
"loss": 0.6551,
"step": 3080
},
{
"epoch": 0.2,
"grad_norm": 0.8623626828193665,
"learning_rate": 9.301462684897195e-06,
"loss": 0.613,
"step": 3081
},
{
"epoch": 0.2,
"grad_norm": 0.9079574346542358,
"learning_rate": 9.300939542046808e-06,
"loss": 0.601,
"step": 3082
},
{
"epoch": 0.2,
"grad_norm": 0.950981616973877,
"learning_rate": 9.30041621809882e-06,
"loss": 0.6574,
"step": 3083
},
{
"epoch": 0.2,
"grad_norm": 0.9608682990074158,
"learning_rate": 9.299892713075263e-06,
"loss": 0.6698,
"step": 3084
},
{
"epoch": 0.2,
"grad_norm": 0.9275756478309631,
"learning_rate": 9.299369026998184e-06,
"loss": 0.6307,
"step": 3085
},
{
"epoch": 0.2,
"grad_norm": 0.9137438535690308,
"learning_rate": 9.298845159889632e-06,
"loss": 0.612,
"step": 3086
},
{
"epoch": 0.2,
"grad_norm": 0.9493110179901123,
"learning_rate": 9.298321111771664e-06,
"loss": 0.6554,
"step": 3087
},
{
"epoch": 0.2,
"grad_norm": 0.9397709369659424,
"learning_rate": 9.297796882666346e-06,
"loss": 0.6389,
"step": 3088
},
{
"epoch": 0.2,
"grad_norm": 0.9529610276222229,
"learning_rate": 9.297272472595753e-06,
"loss": 0.6638,
"step": 3089
},
{
"epoch": 0.2,
"grad_norm": 0.8715389370918274,
"learning_rate": 9.296747881581965e-06,
"loss": 0.6121,
"step": 3090
},
{
"epoch": 0.2,
"grad_norm": 0.8829297423362732,
"learning_rate": 9.29622310964707e-06,
"loss": 0.6215,
"step": 3091
},
{
"epoch": 0.2,
"grad_norm": 0.9472043514251709,
"learning_rate": 9.295698156813167e-06,
"loss": 0.6624,
"step": 3092
},
{
"epoch": 0.2,
"grad_norm": 1.0828962326049805,
"learning_rate": 9.295173023102358e-06,
"loss": 0.6576,
"step": 3093
},
{
"epoch": 0.2,
"grad_norm": 1.0033841133117676,
"learning_rate": 9.294647708536754e-06,
"loss": 0.6569,
"step": 3094
},
{
"epoch": 0.2,
"grad_norm": 0.9431530237197876,
"learning_rate": 9.294122213138475e-06,
"loss": 0.5798,
"step": 3095
},
{
"epoch": 0.2,
"grad_norm": 0.951475977897644,
"learning_rate": 9.29359653692965e-06,
"loss": 0.6196,
"step": 3096
},
{
"epoch": 0.2,
"grad_norm": 0.9203341007232666,
"learning_rate": 9.293070679932407e-06,
"loss": 0.6916,
"step": 3097
},
{
"epoch": 0.2,
"grad_norm": 0.9140121936798096,
"learning_rate": 9.292544642168896e-06,
"loss": 0.5716,
"step": 3098
},
{
"epoch": 0.2,
"grad_norm": 0.8884128928184509,
"learning_rate": 9.292018423661261e-06,
"loss": 0.6084,
"step": 3099
},
{
"epoch": 0.2,
"grad_norm": 0.9457247257232666,
"learning_rate": 9.291492024431661e-06,
"loss": 0.6486,
"step": 3100
},
{
"epoch": 0.2,
"grad_norm": 0.9811872839927673,
"learning_rate": 9.290965444502263e-06,
"loss": 0.6772,
"step": 3101
},
{
"epoch": 0.2,
"grad_norm": 0.8778002858161926,
"learning_rate": 9.290438683895236e-06,
"loss": 0.6258,
"step": 3102
},
{
"epoch": 0.2,
"grad_norm": 0.962437093257904,
"learning_rate": 9.28991174263276e-06,
"loss": 0.6443,
"step": 3103
},
{
"epoch": 0.2,
"grad_norm": 0.961173415184021,
"learning_rate": 9.289384620737025e-06,
"loss": 0.6779,
"step": 3104
},
{
"epoch": 0.2,
"grad_norm": 0.9411439299583435,
"learning_rate": 9.288857318230225e-06,
"loss": 0.6218,
"step": 3105
},
{
"epoch": 0.2,
"grad_norm": 0.9473131895065308,
"learning_rate": 9.288329835134563e-06,
"loss": 0.6684,
"step": 3106
},
{
"epoch": 0.2,
"grad_norm": 0.8709444999694824,
"learning_rate": 9.28780217147225e-06,
"loss": 0.5879,
"step": 3107
},
{
"epoch": 0.2,
"grad_norm": 0.902916431427002,
"learning_rate": 9.287274327265505e-06,
"loss": 0.64,
"step": 3108
},
{
"epoch": 0.2,
"grad_norm": 0.9012413024902344,
"learning_rate": 9.286746302536551e-06,
"loss": 0.6239,
"step": 3109
},
{
"epoch": 0.2,
"grad_norm": 0.9633440971374512,
"learning_rate": 9.286218097307623e-06,
"loss": 0.6198,
"step": 3110
},
{
"epoch": 0.2,
"grad_norm": 0.9164153337478638,
"learning_rate": 9.285689711600961e-06,
"loss": 0.6444,
"step": 3111
},
{
"epoch": 0.2,
"grad_norm": 0.9006356000900269,
"learning_rate": 9.285161145438815e-06,
"loss": 0.6407,
"step": 3112
},
{
"epoch": 0.2,
"grad_norm": 0.8857513666152954,
"learning_rate": 9.284632398843439e-06,
"loss": 0.5797,
"step": 3113
},
{
"epoch": 0.2,
"grad_norm": 0.9555025696754456,
"learning_rate": 9.284103471837097e-06,
"loss": 0.644,
"step": 3114
},
{
"epoch": 0.2,
"grad_norm": 0.9066913723945618,
"learning_rate": 9.283574364442066e-06,
"loss": 0.5744,
"step": 3115
},
{
"epoch": 0.2,
"grad_norm": 0.9738790392875671,
"learning_rate": 9.283045076680614e-06,
"loss": 0.6821,
"step": 3116
},
{
"epoch": 0.2,
"grad_norm": 0.9615656733512878,
"learning_rate": 9.282515608575038e-06,
"loss": 0.6394,
"step": 3117
},
{
"epoch": 0.2,
"grad_norm": 0.9014465808868408,
"learning_rate": 9.281985960147625e-06,
"loss": 0.621,
"step": 3118
},
{
"epoch": 0.2,
"grad_norm": 0.8829550743103027,
"learning_rate": 9.28145613142068e-06,
"loss": 0.6598,
"step": 3119
},
{
"epoch": 0.2,
"grad_norm": 0.9063881039619446,
"learning_rate": 9.28092612241651e-06,
"loss": 0.6228,
"step": 3120
},
{
"epoch": 0.2,
"grad_norm": 0.9000431895256042,
"learning_rate": 9.280395933157436e-06,
"loss": 0.5869,
"step": 3121
},
{
"epoch": 0.2,
"grad_norm": 0.8334502577781677,
"learning_rate": 9.279865563665778e-06,
"loss": 0.593,
"step": 3122
},
{
"epoch": 0.2,
"grad_norm": 0.8414939045906067,
"learning_rate": 9.27933501396387e-06,
"loss": 0.5866,
"step": 3123
},
{
"epoch": 0.2,
"grad_norm": 0.8854286670684814,
"learning_rate": 9.27880428407405e-06,
"loss": 0.639,
"step": 3124
},
{
"epoch": 0.2,
"grad_norm": 0.9396377801895142,
"learning_rate": 9.278273374018669e-06,
"loss": 0.6313,
"step": 3125
},
{
"epoch": 0.2,
"grad_norm": 0.9234236478805542,
"learning_rate": 9.277742283820077e-06,
"loss": 0.6578,
"step": 3126
},
{
"epoch": 0.2,
"grad_norm": 0.9088654518127441,
"learning_rate": 9.27721101350064e-06,
"loss": 0.6698,
"step": 3127
},
{
"epoch": 0.2,
"grad_norm": 0.8841193318367004,
"learning_rate": 9.276679563082726e-06,
"loss": 0.6511,
"step": 3128
},
{
"epoch": 0.2,
"grad_norm": 0.8056107759475708,
"learning_rate": 9.276147932588712e-06,
"loss": 0.573,
"step": 3129
},
{
"epoch": 0.2,
"grad_norm": 0.8983877301216125,
"learning_rate": 9.275616122040985e-06,
"loss": 0.5889,
"step": 3130
},
{
"epoch": 0.2,
"grad_norm": 0.9186437129974365,
"learning_rate": 9.275084131461938e-06,
"loss": 0.6494,
"step": 3131
},
{
"epoch": 0.2,
"grad_norm": 0.9824482202529907,
"learning_rate": 9.27455196087397e-06,
"loss": 0.6029,
"step": 3132
},
{
"epoch": 0.2,
"grad_norm": 0.8984844088554382,
"learning_rate": 9.274019610299487e-06,
"loss": 0.6178,
"step": 3133
},
{
"epoch": 0.2,
"grad_norm": 0.8482160568237305,
"learning_rate": 9.273487079760908e-06,
"loss": 0.5949,
"step": 3134
},
{
"epoch": 0.2,
"grad_norm": 0.9106261134147644,
"learning_rate": 9.272954369280654e-06,
"loss": 0.6198,
"step": 3135
},
{
"epoch": 0.2,
"grad_norm": 0.8580856323242188,
"learning_rate": 9.272421478881158e-06,
"loss": 0.6239,
"step": 3136
},
{
"epoch": 0.2,
"grad_norm": 0.9911568760871887,
"learning_rate": 9.271888408584852e-06,
"loss": 0.6535,
"step": 3137
},
{
"epoch": 0.2,
"grad_norm": 0.8783669471740723,
"learning_rate": 9.27135515841419e-06,
"loss": 0.6204,
"step": 3138
},
{
"epoch": 0.2,
"grad_norm": 0.8823960423469543,
"learning_rate": 9.27082172839162e-06,
"loss": 0.6155,
"step": 3139
},
{
"epoch": 0.2,
"grad_norm": 0.9094551801681519,
"learning_rate": 9.270288118539603e-06,
"loss": 0.636,
"step": 3140
},
{
"epoch": 0.2,
"grad_norm": 0.906217098236084,
"learning_rate": 9.26975432888061e-06,
"loss": 0.5808,
"step": 3141
},
{
"epoch": 0.2,
"grad_norm": 0.8561161160469055,
"learning_rate": 9.269220359437114e-06,
"loss": 0.5757,
"step": 3142
},
{
"epoch": 0.2,
"grad_norm": 0.9296371340751648,
"learning_rate": 9.2686862102316e-06,
"loss": 0.6503,
"step": 3143
},
{
"epoch": 0.2,
"grad_norm": 0.9102144837379456,
"learning_rate": 9.268151881286561e-06,
"loss": 0.6316,
"step": 3144
},
{
"epoch": 0.2,
"grad_norm": 0.8436759114265442,
"learning_rate": 9.267617372624494e-06,
"loss": 0.6341,
"step": 3145
},
{
"epoch": 0.2,
"grad_norm": 0.9814794659614563,
"learning_rate": 9.267082684267905e-06,
"loss": 0.6439,
"step": 3146
},
{
"epoch": 0.2,
"grad_norm": 0.968041181564331,
"learning_rate": 9.266547816239309e-06,
"loss": 0.6738,
"step": 3147
},
{
"epoch": 0.2,
"grad_norm": 0.9353750348091125,
"learning_rate": 9.266012768561225e-06,
"loss": 0.6825,
"step": 3148
},
{
"epoch": 0.2,
"grad_norm": 0.9027935266494751,
"learning_rate": 9.265477541256184e-06,
"loss": 0.6578,
"step": 3149
},
{
"epoch": 0.2,
"grad_norm": 0.9193140864372253,
"learning_rate": 9.264942134346723e-06,
"loss": 0.6061,
"step": 3150
},
{
"epoch": 0.2,
"grad_norm": 0.8780162930488586,
"learning_rate": 9.264406547855386e-06,
"loss": 0.5997,
"step": 3151
},
{
"epoch": 0.2,
"grad_norm": 0.9127413630485535,
"learning_rate": 9.263870781804723e-06,
"loss": 0.6124,
"step": 3152
},
{
"epoch": 0.2,
"grad_norm": 0.9376271963119507,
"learning_rate": 9.263334836217295e-06,
"loss": 0.5963,
"step": 3153
},
{
"epoch": 0.2,
"grad_norm": 0.8163601160049438,
"learning_rate": 9.262798711115667e-06,
"loss": 0.626,
"step": 3154
},
{
"epoch": 0.2,
"grad_norm": 0.8610231280326843,
"learning_rate": 9.262262406522415e-06,
"loss": 0.5428,
"step": 3155
},
{
"epoch": 0.2,
"grad_norm": 0.950401782989502,
"learning_rate": 9.261725922460121e-06,
"loss": 0.6314,
"step": 3156
},
{
"epoch": 0.2,
"grad_norm": 0.9740757942199707,
"learning_rate": 9.261189258951372e-06,
"loss": 0.637,
"step": 3157
},
{
"epoch": 0.2,
"grad_norm": 0.9891514778137207,
"learning_rate": 9.26065241601877e-06,
"loss": 0.6871,
"step": 3158
},
{
"epoch": 0.2,
"grad_norm": 0.8673012852668762,
"learning_rate": 9.260115393684914e-06,
"loss": 0.6357,
"step": 3159
},
{
"epoch": 0.2,
"grad_norm": 0.9474377036094666,
"learning_rate": 9.25957819197242e-06,
"loss": 0.616,
"step": 3160
},
{
"epoch": 0.2,
"grad_norm": 0.8863465189933777,
"learning_rate": 9.259040810903906e-06,
"loss": 0.6046,
"step": 3161
},
{
"epoch": 0.2,
"grad_norm": 0.8935105800628662,
"learning_rate": 9.258503250501998e-06,
"loss": 0.6472,
"step": 3162
},
{
"epoch": 0.2,
"grad_norm": 0.9094743728637695,
"learning_rate": 9.257965510789334e-06,
"loss": 0.5834,
"step": 3163
},
{
"epoch": 0.2,
"grad_norm": 0.9533581137657166,
"learning_rate": 9.257427591788555e-06,
"loss": 0.6631,
"step": 3164
},
{
"epoch": 0.2,
"grad_norm": 0.8987277746200562,
"learning_rate": 9.25688949352231e-06,
"loss": 0.6267,
"step": 3165
},
{
"epoch": 0.2,
"grad_norm": 0.8459535241127014,
"learning_rate": 9.256351216013257e-06,
"loss": 0.6366,
"step": 3166
},
{
"epoch": 0.2,
"grad_norm": 0.8984457850456238,
"learning_rate": 9.255812759284062e-06,
"loss": 0.5716,
"step": 3167
},
{
"epoch": 0.2,
"grad_norm": 0.8287543654441833,
"learning_rate": 9.255274123357396e-06,
"loss": 0.5677,
"step": 3168
},
{
"epoch": 0.2,
"grad_norm": 0.9328951239585876,
"learning_rate": 9.254735308255937e-06,
"loss": 0.7044,
"step": 3169
},
{
"epoch": 0.2,
"grad_norm": 0.9265501499176025,
"learning_rate": 9.254196314002379e-06,
"loss": 0.625,
"step": 3170
},
{
"epoch": 0.2,
"grad_norm": 0.959682285785675,
"learning_rate": 9.253657140619412e-06,
"loss": 0.6506,
"step": 3171
},
{
"epoch": 0.2,
"grad_norm": 0.9735859036445618,
"learning_rate": 9.25311778812974e-06,
"loss": 0.6281,
"step": 3172
},
{
"epoch": 0.2,
"grad_norm": 0.9741908311843872,
"learning_rate": 9.252578256556075e-06,
"loss": 0.6645,
"step": 3173
},
{
"epoch": 0.2,
"grad_norm": 0.9076485633850098,
"learning_rate": 9.252038545921131e-06,
"loss": 0.5691,
"step": 3174
},
{
"epoch": 0.2,
"grad_norm": 0.9652928113937378,
"learning_rate": 9.251498656247636e-06,
"loss": 0.6645,
"step": 3175
},
{
"epoch": 0.2,
"grad_norm": 0.9393512010574341,
"learning_rate": 9.250958587558326e-06,
"loss": 0.6011,
"step": 3176
},
{
"epoch": 0.2,
"grad_norm": 0.9639145731925964,
"learning_rate": 9.250418339875934e-06,
"loss": 0.6379,
"step": 3177
},
{
"epoch": 0.2,
"grad_norm": 0.8793298602104187,
"learning_rate": 9.249877913223213e-06,
"loss": 0.6104,
"step": 3178
},
{
"epoch": 0.2,
"grad_norm": 0.8683106899261475,
"learning_rate": 9.249337307622916e-06,
"loss": 0.62,
"step": 3179
},
{
"epoch": 0.2,
"grad_norm": 0.9256559014320374,
"learning_rate": 9.24879652309781e-06,
"loss": 0.642,
"step": 3180
},
{
"epoch": 0.2,
"grad_norm": 0.8257124423980713,
"learning_rate": 9.248255559670661e-06,
"loss": 0.5951,
"step": 3181
},
{
"epoch": 0.2,
"grad_norm": 0.8258576989173889,
"learning_rate": 9.247714417364251e-06,
"loss": 0.6086,
"step": 3182
},
{
"epoch": 0.2,
"grad_norm": 0.8258581161499023,
"learning_rate": 9.24717309620136e-06,
"loss": 0.576,
"step": 3183
},
{
"epoch": 0.2,
"grad_norm": 0.9140012860298157,
"learning_rate": 9.246631596204788e-06,
"loss": 0.5943,
"step": 3184
},
{
"epoch": 0.2,
"grad_norm": 0.9424448609352112,
"learning_rate": 9.246089917397332e-06,
"loss": 0.6434,
"step": 3185
},
{
"epoch": 0.2,
"grad_norm": 0.9118272066116333,
"learning_rate": 9.2455480598018e-06,
"loss": 0.6748,
"step": 3186
},
{
"epoch": 0.2,
"grad_norm": 0.9356390833854675,
"learning_rate": 9.245006023441008e-06,
"loss": 0.6076,
"step": 3187
},
{
"epoch": 0.2,
"grad_norm": 0.9224506616592407,
"learning_rate": 9.24446380833778e-06,
"loss": 0.6892,
"step": 3188
},
{
"epoch": 0.2,
"grad_norm": 0.8446199893951416,
"learning_rate": 9.243921414514947e-06,
"loss": 0.5628,
"step": 3189
},
{
"epoch": 0.2,
"grad_norm": 0.8842172026634216,
"learning_rate": 9.243378841995346e-06,
"loss": 0.5721,
"step": 3190
},
{
"epoch": 0.2,
"grad_norm": 0.9054396748542786,
"learning_rate": 9.242836090801823e-06,
"loss": 0.6142,
"step": 3191
},
{
"epoch": 0.2,
"grad_norm": 0.832400918006897,
"learning_rate": 9.242293160957231e-06,
"loss": 0.6044,
"step": 3192
},
{
"epoch": 0.2,
"grad_norm": 0.9510114789009094,
"learning_rate": 9.241750052484435e-06,
"loss": 0.6215,
"step": 3193
},
{
"epoch": 0.2,
"grad_norm": 0.9344449043273926,
"learning_rate": 9.241206765406298e-06,
"loss": 0.5672,
"step": 3194
},
{
"epoch": 0.2,
"grad_norm": 0.9397872090339661,
"learning_rate": 9.2406632997457e-06,
"loss": 0.5928,
"step": 3195
},
{
"epoch": 0.2,
"grad_norm": 0.9468801021575928,
"learning_rate": 9.240119655525522e-06,
"loss": 0.6609,
"step": 3196
},
{
"epoch": 0.2,
"grad_norm": 0.9130421280860901,
"learning_rate": 9.239575832768655e-06,
"loss": 0.6606,
"step": 3197
},
{
"epoch": 0.2,
"grad_norm": 0.8960924744606018,
"learning_rate": 9.239031831498e-06,
"loss": 0.588,
"step": 3198
},
{
"epoch": 0.2,
"grad_norm": 0.9796780347824097,
"learning_rate": 9.238487651736458e-06,
"loss": 0.6605,
"step": 3199
},
{
"epoch": 0.2,
"grad_norm": 0.9915714859962463,
"learning_rate": 9.237943293506948e-06,
"loss": 0.6517,
"step": 3200
},
{
"epoch": 0.2,
"grad_norm": 0.8834147453308105,
"learning_rate": 9.237398756832387e-06,
"loss": 0.6094,
"step": 3201
},
{
"epoch": 0.2,
"grad_norm": 0.9595925211906433,
"learning_rate": 9.236854041735706e-06,
"loss": 0.5947,
"step": 3202
},
{
"epoch": 0.2,
"grad_norm": 0.9719516038894653,
"learning_rate": 9.236309148239839e-06,
"loss": 0.6186,
"step": 3203
},
{
"epoch": 0.2,
"grad_norm": 0.912463366985321,
"learning_rate": 9.235764076367732e-06,
"loss": 0.5836,
"step": 3204
},
{
"epoch": 0.2,
"grad_norm": 0.9614611864089966,
"learning_rate": 9.235218826142337e-06,
"loss": 0.6131,
"step": 3205
},
{
"epoch": 0.2,
"grad_norm": 0.8435421586036682,
"learning_rate": 9.234673397586606e-06,
"loss": 0.6088,
"step": 3206
},
{
"epoch": 0.2,
"grad_norm": 0.9031780958175659,
"learning_rate": 9.234127790723512e-06,
"loss": 0.6154,
"step": 3207
},
{
"epoch": 0.2,
"grad_norm": 0.9034252166748047,
"learning_rate": 9.233582005576028e-06,
"loss": 0.5843,
"step": 3208
},
{
"epoch": 0.2,
"grad_norm": 0.912809431552887,
"learning_rate": 9.233036042167131e-06,
"loss": 0.634,
"step": 3209
},
{
"epoch": 0.2,
"grad_norm": 0.924806535243988,
"learning_rate": 9.232489900519812e-06,
"loss": 0.6497,
"step": 3210
},
{
"epoch": 0.2,
"grad_norm": 0.9530941247940063,
"learning_rate": 9.231943580657069e-06,
"loss": 0.6562,
"step": 3211
},
{
"epoch": 0.2,
"grad_norm": 0.8763086795806885,
"learning_rate": 9.2313970826019e-06,
"loss": 0.5798,
"step": 3212
},
{
"epoch": 0.2,
"grad_norm": 0.8387221097946167,
"learning_rate": 9.230850406377323e-06,
"loss": 0.5948,
"step": 3213
},
{
"epoch": 0.2,
"grad_norm": 0.8941132426261902,
"learning_rate": 9.230303552006352e-06,
"loss": 0.5912,
"step": 3214
},
{
"epoch": 0.2,
"grad_norm": 0.9189191460609436,
"learning_rate": 9.229756519512014e-06,
"loss": 0.6402,
"step": 3215
},
{
"epoch": 0.2,
"grad_norm": 0.8971881866455078,
"learning_rate": 9.229209308917343e-06,
"loss": 0.6072,
"step": 3216
},
{
"epoch": 0.2,
"grad_norm": 0.910284161567688,
"learning_rate": 9.228661920245383e-06,
"loss": 0.5816,
"step": 3217
},
{
"epoch": 0.2,
"grad_norm": 0.8809064626693726,
"learning_rate": 9.22811435351918e-06,
"loss": 0.5952,
"step": 3218
},
{
"epoch": 0.2,
"grad_norm": 0.9248557686805725,
"learning_rate": 9.227566608761786e-06,
"loss": 0.6442,
"step": 3219
},
{
"epoch": 0.2,
"grad_norm": 0.9311677813529968,
"learning_rate": 9.227018685996272e-06,
"loss": 0.6215,
"step": 3220
},
{
"epoch": 0.2,
"grad_norm": 0.8465821146965027,
"learning_rate": 9.226470585245706e-06,
"loss": 0.6053,
"step": 3221
},
{
"epoch": 0.2,
"grad_norm": 0.9327176213264465,
"learning_rate": 9.225922306533164e-06,
"loss": 0.6709,
"step": 3222
},
{
"epoch": 0.2,
"grad_norm": 0.8878608345985413,
"learning_rate": 9.225373849881739e-06,
"loss": 0.6033,
"step": 3223
},
{
"epoch": 0.2,
"grad_norm": 0.8926795721054077,
"learning_rate": 9.224825215314515e-06,
"loss": 0.6279,
"step": 3224
},
{
"epoch": 0.2,
"grad_norm": 0.9022210240364075,
"learning_rate": 9.224276402854601e-06,
"loss": 0.641,
"step": 3225
},
{
"epoch": 0.2,
"grad_norm": 0.9377365708351135,
"learning_rate": 9.223727412525103e-06,
"loss": 0.6236,
"step": 3226
},
{
"epoch": 0.2,
"grad_norm": 0.9372929334640503,
"learning_rate": 9.223178244349135e-06,
"loss": 0.6389,
"step": 3227
},
{
"epoch": 0.2,
"grad_norm": 0.8741313219070435,
"learning_rate": 9.222628898349825e-06,
"loss": 0.6063,
"step": 3228
},
{
"epoch": 0.2,
"grad_norm": 0.8348528742790222,
"learning_rate": 9.2220793745503e-06,
"loss": 0.5962,
"step": 3229
},
{
"epoch": 0.2,
"grad_norm": 0.8601580858230591,
"learning_rate": 9.221529672973701e-06,
"loss": 0.589,
"step": 3230
},
{
"epoch": 0.2,
"grad_norm": 1.0050134658813477,
"learning_rate": 9.220979793643173e-06,
"loss": 0.6276,
"step": 3231
},
{
"epoch": 0.2,
"grad_norm": 0.9667968153953552,
"learning_rate": 9.220429736581869e-06,
"loss": 0.654,
"step": 3232
},
{
"epoch": 0.2,
"grad_norm": 0.9288026690483093,
"learning_rate": 9.219879501812952e-06,
"loss": 0.5867,
"step": 3233
},
{
"epoch": 0.2,
"grad_norm": 0.9640477895736694,
"learning_rate": 9.219329089359588e-06,
"loss": 0.6481,
"step": 3234
},
{
"epoch": 0.2,
"grad_norm": 1.0388972759246826,
"learning_rate": 9.218778499244953e-06,
"loss": 0.6895,
"step": 3235
},
{
"epoch": 0.21,
"grad_norm": 0.9421420693397522,
"learning_rate": 9.218227731492234e-06,
"loss": 0.6852,
"step": 3236
},
{
"epoch": 0.21,
"grad_norm": 0.9457274079322815,
"learning_rate": 9.217676786124616e-06,
"loss": 0.6247,
"step": 3237
},
{
"epoch": 0.21,
"grad_norm": 0.9554296135902405,
"learning_rate": 9.217125663165303e-06,
"loss": 0.6557,
"step": 3238
},
{
"epoch": 0.21,
"grad_norm": 0.930719792842865,
"learning_rate": 9.216574362637498e-06,
"loss": 0.7073,
"step": 3239
},
{
"epoch": 0.21,
"grad_norm": 0.880737841129303,
"learning_rate": 9.216022884564414e-06,
"loss": 0.6405,
"step": 3240
},
{
"epoch": 0.21,
"grad_norm": 0.9576687812805176,
"learning_rate": 9.215471228969275e-06,
"loss": 0.6455,
"step": 3241
},
{
"epoch": 0.21,
"grad_norm": 0.8890754580497742,
"learning_rate": 9.214919395875306e-06,
"loss": 0.674,
"step": 3242
},
{
"epoch": 0.21,
"grad_norm": 0.8603907823562622,
"learning_rate": 9.214367385305744e-06,
"loss": 0.5467,
"step": 3243
},
{
"epoch": 0.21,
"grad_norm": 0.925334632396698,
"learning_rate": 9.213815197283834e-06,
"loss": 0.6226,
"step": 3244
},
{
"epoch": 0.21,
"grad_norm": 0.9949658513069153,
"learning_rate": 9.21326283183282e-06,
"loss": 0.6078,
"step": 3245
},
{
"epoch": 0.21,
"grad_norm": 0.8808592557907104,
"learning_rate": 9.21271028897597e-06,
"loss": 0.6003,
"step": 3246
},
{
"epoch": 0.21,
"grad_norm": 0.9511841535568237,
"learning_rate": 9.212157568736542e-06,
"loss": 0.6048,
"step": 3247
},
{
"epoch": 0.21,
"grad_norm": 0.8486485481262207,
"learning_rate": 9.211604671137812e-06,
"loss": 0.6194,
"step": 3248
},
{
"epoch": 0.21,
"grad_norm": 0.9710730314254761,
"learning_rate": 9.211051596203061e-06,
"loss": 0.6914,
"step": 3249
},
{
"epoch": 0.21,
"grad_norm": 0.8839832544326782,
"learning_rate": 9.210498343955576e-06,
"loss": 0.6243,
"step": 3250
},
{
"epoch": 0.21,
"grad_norm": 0.9419470429420471,
"learning_rate": 9.209944914418653e-06,
"loss": 0.6847,
"step": 3251
},
{
"epoch": 0.21,
"grad_norm": 0.8645347356796265,
"learning_rate": 9.209391307615596e-06,
"loss": 0.574,
"step": 3252
},
{
"epoch": 0.21,
"grad_norm": 0.9880130887031555,
"learning_rate": 9.208837523569713e-06,
"loss": 0.6631,
"step": 3253
},
{
"epoch": 0.21,
"grad_norm": 0.8735252618789673,
"learning_rate": 9.208283562304326e-06,
"loss": 0.5747,
"step": 3254
},
{
"epoch": 0.21,
"grad_norm": 0.9930894374847412,
"learning_rate": 9.207729423842755e-06,
"loss": 0.6138,
"step": 3255
},
{
"epoch": 0.21,
"grad_norm": 0.9474650025367737,
"learning_rate": 9.207175108208334e-06,
"loss": 0.6524,
"step": 3256
},
{
"epoch": 0.21,
"grad_norm": 0.9482831358909607,
"learning_rate": 9.20662061542441e-06,
"loss": 0.6654,
"step": 3257
},
{
"epoch": 0.21,
"grad_norm": 0.8777074813842773,
"learning_rate": 9.206065945514321e-06,
"loss": 0.6201,
"step": 3258
},
{
"epoch": 0.21,
"grad_norm": 0.9118297696113586,
"learning_rate": 9.20551109850143e-06,
"loss": 0.6074,
"step": 3259
},
{
"epoch": 0.21,
"grad_norm": 0.973640501499176,
"learning_rate": 9.204956074409095e-06,
"loss": 0.6246,
"step": 3260
},
{
"epoch": 0.21,
"grad_norm": 0.9374106526374817,
"learning_rate": 9.204400873260688e-06,
"loss": 0.6306,
"step": 3261
},
{
"epoch": 0.21,
"grad_norm": 0.8689625859260559,
"learning_rate": 9.203845495079587e-06,
"loss": 0.6317,
"step": 3262
},
{
"epoch": 0.21,
"grad_norm": 0.8900114893913269,
"learning_rate": 9.203289939889175e-06,
"loss": 0.6137,
"step": 3263
},
{
"epoch": 0.21,
"grad_norm": 0.862295389175415,
"learning_rate": 9.202734207712847e-06,
"loss": 0.6081,
"step": 3264
},
{
"epoch": 0.21,
"grad_norm": 0.8732759952545166,
"learning_rate": 9.202178298574e-06,
"loss": 0.5917,
"step": 3265
},
{
"epoch": 0.21,
"grad_norm": 0.9287835359573364,
"learning_rate": 9.201622212496043e-06,
"loss": 0.6226,
"step": 3266
},
{
"epoch": 0.21,
"grad_norm": 0.9804710745811462,
"learning_rate": 9.201065949502394e-06,
"loss": 0.6828,
"step": 3267
},
{
"epoch": 0.21,
"grad_norm": 0.907406210899353,
"learning_rate": 9.20050950961647e-06,
"loss": 0.6004,
"step": 3268
},
{
"epoch": 0.21,
"grad_norm": 0.870427668094635,
"learning_rate": 9.199952892861706e-06,
"loss": 0.6121,
"step": 3269
},
{
"epoch": 0.21,
"grad_norm": 0.8811596035957336,
"learning_rate": 9.199396099261532e-06,
"loss": 0.6258,
"step": 3270
},
{
"epoch": 0.21,
"grad_norm": 0.9226367473602295,
"learning_rate": 9.198839128839399e-06,
"loss": 0.6473,
"step": 3271
},
{
"epoch": 0.21,
"grad_norm": 0.8732794523239136,
"learning_rate": 9.198281981618757e-06,
"loss": 0.5947,
"step": 3272
},
{
"epoch": 0.21,
"grad_norm": 0.9539616703987122,
"learning_rate": 9.197724657623066e-06,
"loss": 0.6034,
"step": 3273
},
{
"epoch": 0.21,
"grad_norm": 0.9064382910728455,
"learning_rate": 9.197167156875793e-06,
"loss": 0.6329,
"step": 3274
},
{
"epoch": 0.21,
"grad_norm": 0.9909444451332092,
"learning_rate": 9.19660947940041e-06,
"loss": 0.6264,
"step": 3275
},
{
"epoch": 0.21,
"grad_norm": 0.8617537021636963,
"learning_rate": 9.196051625220401e-06,
"loss": 0.5953,
"step": 3276
},
{
"epoch": 0.21,
"grad_norm": 0.8831681609153748,
"learning_rate": 9.195493594359254e-06,
"loss": 0.6043,
"step": 3277
},
{
"epoch": 0.21,
"grad_norm": 0.9826748371124268,
"learning_rate": 9.19493538684047e-06,
"loss": 0.6699,
"step": 3278
},
{
"epoch": 0.21,
"grad_norm": 0.8774879574775696,
"learning_rate": 9.194377002687547e-06,
"loss": 0.6173,
"step": 3279
},
{
"epoch": 0.21,
"grad_norm": 0.976276159286499,
"learning_rate": 9.193818441924003e-06,
"loss": 0.6248,
"step": 3280
},
{
"epoch": 0.21,
"grad_norm": 0.8805941343307495,
"learning_rate": 9.19325970457335e-06,
"loss": 0.5739,
"step": 3281
},
{
"epoch": 0.21,
"grad_norm": 0.8417159914970398,
"learning_rate": 9.192700790659121e-06,
"loss": 0.5478,
"step": 3282
},
{
"epoch": 0.21,
"grad_norm": 0.958260178565979,
"learning_rate": 9.192141700204844e-06,
"loss": 0.7037,
"step": 3283
},
{
"epoch": 0.21,
"grad_norm": 0.8954302668571472,
"learning_rate": 9.191582433234067e-06,
"loss": 0.6518,
"step": 3284
},
{
"epoch": 0.21,
"grad_norm": 0.8878317475318909,
"learning_rate": 9.191022989770332e-06,
"loss": 0.6168,
"step": 3285
},
{
"epoch": 0.21,
"grad_norm": 0.9823928475379944,
"learning_rate": 9.1904633698372e-06,
"loss": 0.6546,
"step": 3286
},
{
"epoch": 0.21,
"grad_norm": 0.8733540177345276,
"learning_rate": 9.189903573458234e-06,
"loss": 0.6393,
"step": 3287
},
{
"epoch": 0.21,
"grad_norm": 0.9368897080421448,
"learning_rate": 9.189343600657002e-06,
"loss": 0.6342,
"step": 3288
},
{
"epoch": 0.21,
"grad_norm": 0.9004266858100891,
"learning_rate": 9.188783451457086e-06,
"loss": 0.6332,
"step": 3289
},
{
"epoch": 0.21,
"grad_norm": 0.8798797130584717,
"learning_rate": 9.18822312588207e-06,
"loss": 0.6061,
"step": 3290
},
{
"epoch": 0.21,
"grad_norm": 0.8371910452842712,
"learning_rate": 9.187662623955548e-06,
"loss": 0.5436,
"step": 3291
},
{
"epoch": 0.21,
"grad_norm": 0.9449594616889954,
"learning_rate": 9.18710194570112e-06,
"loss": 0.629,
"step": 3292
},
{
"epoch": 0.21,
"grad_norm": 0.8512078523635864,
"learning_rate": 9.186541091142397e-06,
"loss": 0.5525,
"step": 3293
},
{
"epoch": 0.21,
"grad_norm": 0.9958682656288147,
"learning_rate": 9.18598006030299e-06,
"loss": 0.6585,
"step": 3294
},
{
"epoch": 0.21,
"grad_norm": 0.8955892324447632,
"learning_rate": 9.185418853206528e-06,
"loss": 0.5793,
"step": 3295
},
{
"epoch": 0.21,
"grad_norm": 1.0014921426773071,
"learning_rate": 9.184857469876635e-06,
"loss": 0.6248,
"step": 3296
},
{
"epoch": 0.21,
"grad_norm": 0.9238271117210388,
"learning_rate": 9.184295910336953e-06,
"loss": 0.6186,
"step": 3297
},
{
"epoch": 0.21,
"grad_norm": 0.9311379790306091,
"learning_rate": 9.183734174611125e-06,
"loss": 0.702,
"step": 3298
},
{
"epoch": 0.21,
"grad_norm": 0.9323460459709167,
"learning_rate": 9.183172262722807e-06,
"loss": 0.7064,
"step": 3299
},
{
"epoch": 0.21,
"grad_norm": 0.8786803483963013,
"learning_rate": 9.182610174695656e-06,
"loss": 0.6119,
"step": 3300
},
{
"epoch": 0.21,
"grad_norm": 0.8774591088294983,
"learning_rate": 9.182047910553342e-06,
"loss": 0.6455,
"step": 3301
},
{
"epoch": 0.21,
"grad_norm": 0.8772428035736084,
"learning_rate": 9.181485470319537e-06,
"loss": 0.6336,
"step": 3302
},
{
"epoch": 0.21,
"grad_norm": 0.9135443568229675,
"learning_rate": 9.180922854017927e-06,
"loss": 0.5964,
"step": 3303
},
{
"epoch": 0.21,
"grad_norm": 0.9094753861427307,
"learning_rate": 9.1803600616722e-06,
"loss": 0.6492,
"step": 3304
},
{
"epoch": 0.21,
"grad_norm": 1.001076340675354,
"learning_rate": 9.179797093306053e-06,
"loss": 0.6767,
"step": 3305
},
{
"epoch": 0.21,
"grad_norm": 0.989811360836029,
"learning_rate": 9.17923394894319e-06,
"loss": 0.6325,
"step": 3306
},
{
"epoch": 0.21,
"grad_norm": 0.8956232070922852,
"learning_rate": 9.178670628607325e-06,
"loss": 0.6254,
"step": 3307
},
{
"epoch": 0.21,
"grad_norm": 0.8942602276802063,
"learning_rate": 9.178107132322174e-06,
"loss": 0.5803,
"step": 3308
},
{
"epoch": 0.21,
"grad_norm": 0.9630834460258484,
"learning_rate": 9.177543460111469e-06,
"loss": 0.6188,
"step": 3309
},
{
"epoch": 0.21,
"grad_norm": 0.912510097026825,
"learning_rate": 9.17697961199894e-06,
"loss": 0.6383,
"step": 3310
},
{
"epoch": 0.21,
"grad_norm": 0.9050446152687073,
"learning_rate": 9.176415588008332e-06,
"loss": 0.67,
"step": 3311
},
{
"epoch": 0.21,
"grad_norm": 0.9326666593551636,
"learning_rate": 9.175851388163391e-06,
"loss": 0.6253,
"step": 3312
},
{
"epoch": 0.21,
"grad_norm": 0.933397114276886,
"learning_rate": 9.175287012487874e-06,
"loss": 0.6303,
"step": 3313
},
{
"epoch": 0.21,
"grad_norm": 0.8903535604476929,
"learning_rate": 9.174722461005546e-06,
"loss": 0.6088,
"step": 3314
},
{
"epoch": 0.21,
"grad_norm": 0.9588652849197388,
"learning_rate": 9.174157733740178e-06,
"loss": 0.6508,
"step": 3315
},
{
"epoch": 0.21,
"grad_norm": 0.9236728549003601,
"learning_rate": 9.173592830715548e-06,
"loss": 0.5812,
"step": 3316
},
{
"epoch": 0.21,
"grad_norm": 0.9767409563064575,
"learning_rate": 9.173027751955444e-06,
"loss": 0.6523,
"step": 3317
},
{
"epoch": 0.21,
"grad_norm": 0.8604898452758789,
"learning_rate": 9.172462497483658e-06,
"loss": 0.555,
"step": 3318
},
{
"epoch": 0.21,
"grad_norm": 0.9616580009460449,
"learning_rate": 9.17189706732399e-06,
"loss": 0.6045,
"step": 3319
},
{
"epoch": 0.21,
"grad_norm": 1.0537388324737549,
"learning_rate": 9.171331461500253e-06,
"loss": 0.6657,
"step": 3320
},
{
"epoch": 0.21,
"grad_norm": 0.8993361592292786,
"learning_rate": 9.170765680036256e-06,
"loss": 0.6046,
"step": 3321
},
{
"epoch": 0.21,
"grad_norm": 0.8810584545135498,
"learning_rate": 9.170199722955825e-06,
"loss": 0.6191,
"step": 3322
},
{
"epoch": 0.21,
"grad_norm": 0.8661196231842041,
"learning_rate": 9.169633590282793e-06,
"loss": 0.608,
"step": 3323
},
{
"epoch": 0.21,
"grad_norm": 0.9606330990791321,
"learning_rate": 9.169067282040994e-06,
"loss": 0.649,
"step": 3324
},
{
"epoch": 0.21,
"grad_norm": 0.8650776147842407,
"learning_rate": 9.168500798254275e-06,
"loss": 0.5795,
"step": 3325
},
{
"epoch": 0.21,
"grad_norm": 0.9146811366081238,
"learning_rate": 9.167934138946489e-06,
"loss": 0.6595,
"step": 3326
},
{
"epoch": 0.21,
"grad_norm": 1.0079501867294312,
"learning_rate": 9.167367304141494e-06,
"loss": 0.6568,
"step": 3327
},
{
"epoch": 0.21,
"grad_norm": 0.9679005146026611,
"learning_rate": 9.166800293863161e-06,
"loss": 0.6695,
"step": 3328
},
{
"epoch": 0.21,
"grad_norm": 0.9201866984367371,
"learning_rate": 9.166233108135362e-06,
"loss": 0.5872,
"step": 3329
},
{
"epoch": 0.21,
"grad_norm": 0.9234635233879089,
"learning_rate": 9.165665746981982e-06,
"loss": 0.6317,
"step": 3330
},
{
"epoch": 0.21,
"grad_norm": 0.8861828446388245,
"learning_rate": 9.165098210426905e-06,
"loss": 0.6627,
"step": 3331
},
{
"epoch": 0.21,
"grad_norm": 0.8471156358718872,
"learning_rate": 9.164530498494035e-06,
"loss": 0.6388,
"step": 3332
},
{
"epoch": 0.21,
"grad_norm": 0.898435115814209,
"learning_rate": 9.163962611207272e-06,
"loss": 0.6261,
"step": 3333
},
{
"epoch": 0.21,
"grad_norm": 0.9174916744232178,
"learning_rate": 9.163394548590529e-06,
"loss": 0.6421,
"step": 3334
},
{
"epoch": 0.21,
"grad_norm": 0.922631025314331,
"learning_rate": 9.162826310667725e-06,
"loss": 0.641,
"step": 3335
},
{
"epoch": 0.21,
"grad_norm": 0.9309580326080322,
"learning_rate": 9.162257897462784e-06,
"loss": 0.6544,
"step": 3336
},
{
"epoch": 0.21,
"grad_norm": 0.8594711422920227,
"learning_rate": 9.161689308999646e-06,
"loss": 0.6029,
"step": 3337
},
{
"epoch": 0.21,
"grad_norm": 0.969755232334137,
"learning_rate": 9.161120545302246e-06,
"loss": 0.6378,
"step": 3338
},
{
"epoch": 0.21,
"grad_norm": 0.9250763058662415,
"learning_rate": 9.160551606394537e-06,
"loss": 0.6754,
"step": 3339
},
{
"epoch": 0.21,
"grad_norm": 0.910316526889801,
"learning_rate": 9.159982492300473e-06,
"loss": 0.6433,
"step": 3340
},
{
"epoch": 0.21,
"grad_norm": 0.9393495321273804,
"learning_rate": 9.159413203044017e-06,
"loss": 0.614,
"step": 3341
},
{
"epoch": 0.21,
"grad_norm": 0.9090781211853027,
"learning_rate": 9.158843738649141e-06,
"loss": 0.601,
"step": 3342
},
{
"epoch": 0.21,
"grad_norm": 0.8957191705703735,
"learning_rate": 9.158274099139823e-06,
"loss": 0.6071,
"step": 3343
},
{
"epoch": 0.21,
"grad_norm": 0.8667554259300232,
"learning_rate": 9.157704284540047e-06,
"loss": 0.5847,
"step": 3344
},
{
"epoch": 0.21,
"grad_norm": 0.904606819152832,
"learning_rate": 9.15713429487381e-06,
"loss": 0.6435,
"step": 3345
},
{
"epoch": 0.21,
"grad_norm": 0.8986235857009888,
"learning_rate": 9.156564130165106e-06,
"loss": 0.5976,
"step": 3346
},
{
"epoch": 0.21,
"grad_norm": 0.9764082431793213,
"learning_rate": 9.155993790437949e-06,
"loss": 0.6332,
"step": 3347
},
{
"epoch": 0.21,
"grad_norm": 0.8578452467918396,
"learning_rate": 9.155423275716351e-06,
"loss": 0.5981,
"step": 3348
},
{
"epoch": 0.21,
"grad_norm": 0.8971353769302368,
"learning_rate": 9.154852586024332e-06,
"loss": 0.6173,
"step": 3349
},
{
"epoch": 0.21,
"grad_norm": 0.8360897302627563,
"learning_rate": 9.154281721385928e-06,
"loss": 0.6029,
"step": 3350
},
{
"epoch": 0.21,
"grad_norm": 0.9505079984664917,
"learning_rate": 9.153710681825169e-06,
"loss": 0.6472,
"step": 3351
},
{
"epoch": 0.21,
"grad_norm": 0.8876816034317017,
"learning_rate": 9.153139467366103e-06,
"loss": 0.6335,
"step": 3352
},
{
"epoch": 0.21,
"grad_norm": 0.8487616777420044,
"learning_rate": 9.152568078032783e-06,
"loss": 0.6377,
"step": 3353
},
{
"epoch": 0.21,
"grad_norm": 0.9489740133285522,
"learning_rate": 9.151996513849267e-06,
"loss": 0.6148,
"step": 3354
},
{
"epoch": 0.21,
"grad_norm": 0.7857329249382019,
"learning_rate": 9.151424774839622e-06,
"loss": 0.5779,
"step": 3355
},
{
"epoch": 0.21,
"grad_norm": 0.8584344983100891,
"learning_rate": 9.15085286102792e-06,
"loss": 0.5893,
"step": 3356
},
{
"epoch": 0.21,
"grad_norm": 0.8409185409545898,
"learning_rate": 9.150280772438245e-06,
"loss": 0.6258,
"step": 3357
},
{
"epoch": 0.21,
"grad_norm": 0.8375939726829529,
"learning_rate": 9.149708509094684e-06,
"loss": 0.5923,
"step": 3358
},
{
"epoch": 0.21,
"grad_norm": 0.9026387333869934,
"learning_rate": 9.149136071021333e-06,
"loss": 0.6251,
"step": 3359
},
{
"epoch": 0.21,
"grad_norm": 0.8975716233253479,
"learning_rate": 9.148563458242296e-06,
"loss": 0.6533,
"step": 3360
},
{
"epoch": 0.21,
"grad_norm": 0.8834118247032166,
"learning_rate": 9.147990670781683e-06,
"loss": 0.6297,
"step": 3361
},
{
"epoch": 0.21,
"grad_norm": 0.8428575992584229,
"learning_rate": 9.147417708663615e-06,
"loss": 0.5204,
"step": 3362
},
{
"epoch": 0.21,
"grad_norm": 0.8661702871322632,
"learning_rate": 9.146844571912213e-06,
"loss": 0.6314,
"step": 3363
},
{
"epoch": 0.21,
"grad_norm": 0.9363715052604675,
"learning_rate": 9.146271260551614e-06,
"loss": 0.6431,
"step": 3364
},
{
"epoch": 0.21,
"grad_norm": 0.9469258785247803,
"learning_rate": 9.145697774605953e-06,
"loss": 0.6139,
"step": 3365
},
{
"epoch": 0.21,
"grad_norm": 0.9144854545593262,
"learning_rate": 9.145124114099382e-06,
"loss": 0.6105,
"step": 3366
},
{
"epoch": 0.21,
"grad_norm": 1.001625657081604,
"learning_rate": 9.144550279056055e-06,
"loss": 0.5842,
"step": 3367
},
{
"epoch": 0.21,
"grad_norm": 0.9497262239456177,
"learning_rate": 9.143976269500133e-06,
"loss": 0.5686,
"step": 3368
},
{
"epoch": 0.21,
"grad_norm": 0.9559330940246582,
"learning_rate": 9.143402085455785e-06,
"loss": 0.6098,
"step": 3369
},
{
"epoch": 0.21,
"grad_norm": 0.8853155374526978,
"learning_rate": 9.142827726947193e-06,
"loss": 0.6347,
"step": 3370
},
{
"epoch": 0.21,
"grad_norm": 0.9385725855827332,
"learning_rate": 9.142253193998533e-06,
"loss": 0.6098,
"step": 3371
},
{
"epoch": 0.21,
"grad_norm": 0.8995375037193298,
"learning_rate": 9.141678486634002e-06,
"loss": 0.6358,
"step": 3372
},
{
"epoch": 0.21,
"grad_norm": 0.8732660412788391,
"learning_rate": 9.1411036048778e-06,
"loss": 0.6316,
"step": 3373
},
{
"epoch": 0.21,
"grad_norm": 0.8813968896865845,
"learning_rate": 9.140528548754128e-06,
"loss": 0.6106,
"step": 3374
},
{
"epoch": 0.21,
"grad_norm": 0.8351157307624817,
"learning_rate": 9.139953318287204e-06,
"loss": 0.5437,
"step": 3375
},
{
"epoch": 0.21,
"grad_norm": 0.9750312566757202,
"learning_rate": 9.139377913501247e-06,
"loss": 0.63,
"step": 3376
},
{
"epoch": 0.21,
"grad_norm": 0.8909156322479248,
"learning_rate": 9.138802334420486e-06,
"loss": 0.58,
"step": 3377
},
{
"epoch": 0.21,
"grad_norm": 0.9233285188674927,
"learning_rate": 9.138226581069158e-06,
"loss": 0.6109,
"step": 3378
},
{
"epoch": 0.21,
"grad_norm": 0.9473268985748291,
"learning_rate": 9.137650653471505e-06,
"loss": 0.6883,
"step": 3379
},
{
"epoch": 0.21,
"grad_norm": 0.9071610569953918,
"learning_rate": 9.137074551651774e-06,
"loss": 0.6187,
"step": 3380
},
{
"epoch": 0.21,
"grad_norm": 0.9321165084838867,
"learning_rate": 9.136498275634226e-06,
"loss": 0.649,
"step": 3381
},
{
"epoch": 0.21,
"grad_norm": 0.885661780834198,
"learning_rate": 9.135921825443125e-06,
"loss": 0.6455,
"step": 3382
},
{
"epoch": 0.21,
"grad_norm": 0.9413583278656006,
"learning_rate": 9.135345201102745e-06,
"loss": 0.5993,
"step": 3383
},
{
"epoch": 0.21,
"grad_norm": 0.9824182391166687,
"learning_rate": 9.134768402637366e-06,
"loss": 0.6636,
"step": 3384
},
{
"epoch": 0.21,
"grad_norm": 0.8217403292655945,
"learning_rate": 9.13419143007127e-06,
"loss": 0.6168,
"step": 3385
},
{
"epoch": 0.21,
"grad_norm": 0.8587862253189087,
"learning_rate": 9.133614283428757e-06,
"loss": 0.58,
"step": 3386
},
{
"epoch": 0.21,
"grad_norm": 1.0002095699310303,
"learning_rate": 9.133036962734127e-06,
"loss": 0.6247,
"step": 3387
},
{
"epoch": 0.21,
"grad_norm": 0.8633260726928711,
"learning_rate": 9.132459468011686e-06,
"loss": 0.6379,
"step": 3388
},
{
"epoch": 0.21,
"grad_norm": 0.9585233926773071,
"learning_rate": 9.131881799285754e-06,
"loss": 0.6496,
"step": 3389
},
{
"epoch": 0.21,
"grad_norm": 1.0030509233474731,
"learning_rate": 9.131303956580653e-06,
"loss": 0.6768,
"step": 3390
},
{
"epoch": 0.21,
"grad_norm": 0.8934270143508911,
"learning_rate": 9.130725939920712e-06,
"loss": 0.5957,
"step": 3391
},
{
"epoch": 0.21,
"grad_norm": 0.9003897905349731,
"learning_rate": 9.130147749330275e-06,
"loss": 0.6296,
"step": 3392
},
{
"epoch": 0.21,
"grad_norm": 0.8862766027450562,
"learning_rate": 9.129569384833682e-06,
"loss": 0.636,
"step": 3393
},
{
"epoch": 0.22,
"grad_norm": 0.9102445840835571,
"learning_rate": 9.128990846455287e-06,
"loss": 0.66,
"step": 3394
},
{
"epoch": 0.22,
"grad_norm": 0.9600756168365479,
"learning_rate": 9.128412134219453e-06,
"loss": 0.5945,
"step": 3395
},
{
"epoch": 0.22,
"grad_norm": 0.9160851240158081,
"learning_rate": 9.127833248150546e-06,
"loss": 0.6609,
"step": 3396
},
{
"epoch": 0.22,
"grad_norm": 0.9181495904922485,
"learning_rate": 9.12725418827294e-06,
"loss": 0.5946,
"step": 3397
},
{
"epoch": 0.22,
"grad_norm": 0.870098888874054,
"learning_rate": 9.126674954611016e-06,
"loss": 0.6147,
"step": 3398
},
{
"epoch": 0.22,
"grad_norm": 0.8894675374031067,
"learning_rate": 9.12609554718917e-06,
"loss": 0.6234,
"step": 3399
},
{
"epoch": 0.22,
"grad_norm": 0.8688364028930664,
"learning_rate": 9.12551596603179e-06,
"loss": 0.6225,
"step": 3400
},
{
"epoch": 0.22,
"grad_norm": 0.8926935195922852,
"learning_rate": 9.124936211163284e-06,
"loss": 0.623,
"step": 3401
},
{
"epoch": 0.22,
"grad_norm": 0.8921478390693665,
"learning_rate": 9.124356282608065e-06,
"loss": 0.5813,
"step": 3402
},
{
"epoch": 0.22,
"grad_norm": 0.9715839624404907,
"learning_rate": 9.123776180390552e-06,
"loss": 0.6453,
"step": 3403
},
{
"epoch": 0.22,
"grad_norm": 0.95328289270401,
"learning_rate": 9.123195904535167e-06,
"loss": 0.5729,
"step": 3404
},
{
"epoch": 0.22,
"grad_norm": 0.9020276665687561,
"learning_rate": 9.122615455066348e-06,
"loss": 0.6469,
"step": 3405
},
{
"epoch": 0.22,
"grad_norm": 0.9650804996490479,
"learning_rate": 9.122034832008532e-06,
"loss": 0.6088,
"step": 3406
},
{
"epoch": 0.22,
"grad_norm": 0.9247978329658508,
"learning_rate": 9.12145403538617e-06,
"loss": 0.6298,
"step": 3407
},
{
"epoch": 0.22,
"grad_norm": 0.9034278988838196,
"learning_rate": 9.120873065223716e-06,
"loss": 0.6255,
"step": 3408
},
{
"epoch": 0.22,
"grad_norm": 0.9190613031387329,
"learning_rate": 9.120291921545633e-06,
"loss": 0.5856,
"step": 3409
},
{
"epoch": 0.22,
"grad_norm": 1.0126057863235474,
"learning_rate": 9.11971060437639e-06,
"loss": 0.6682,
"step": 3410
},
{
"epoch": 0.22,
"grad_norm": 0.9093670845031738,
"learning_rate": 9.119129113740463e-06,
"loss": 0.5967,
"step": 3411
},
{
"epoch": 0.22,
"grad_norm": 0.8827959299087524,
"learning_rate": 9.118547449662342e-06,
"loss": 0.6004,
"step": 3412
},
{
"epoch": 0.22,
"grad_norm": 0.9230462312698364,
"learning_rate": 9.117965612166514e-06,
"loss": 0.636,
"step": 3413
},
{
"epoch": 0.22,
"grad_norm": 0.8487642407417297,
"learning_rate": 9.117383601277478e-06,
"loss": 0.6186,
"step": 3414
},
{
"epoch": 0.22,
"grad_norm": 0.8755055665969849,
"learning_rate": 9.116801417019744e-06,
"loss": 0.581,
"step": 3415
},
{
"epoch": 0.22,
"grad_norm": 0.9088988304138184,
"learning_rate": 9.116219059417821e-06,
"loss": 0.6535,
"step": 3416
},
{
"epoch": 0.22,
"grad_norm": 0.9298532009124756,
"learning_rate": 9.115636528496236e-06,
"loss": 0.6107,
"step": 3417
},
{
"epoch": 0.22,
"grad_norm": 0.8741108775138855,
"learning_rate": 9.115053824279511e-06,
"loss": 0.6134,
"step": 3418
},
{
"epoch": 0.22,
"grad_norm": 0.9260140657424927,
"learning_rate": 9.114470946792187e-06,
"loss": 0.6243,
"step": 3419
},
{
"epoch": 0.22,
"grad_norm": 0.8403961062431335,
"learning_rate": 9.113887896058805e-06,
"loss": 0.6119,
"step": 3420
},
{
"epoch": 0.22,
"grad_norm": 0.9289052486419678,
"learning_rate": 9.11330467210391e-06,
"loss": 0.6269,
"step": 3421
},
{
"epoch": 0.22,
"grad_norm": 0.9038977026939392,
"learning_rate": 9.11272127495207e-06,
"loss": 0.5929,
"step": 3422
},
{
"epoch": 0.22,
"grad_norm": 0.8994903564453125,
"learning_rate": 9.112137704627842e-06,
"loss": 0.6708,
"step": 3423
},
{
"epoch": 0.22,
"grad_norm": 0.8599143028259277,
"learning_rate": 9.1115539611558e-06,
"loss": 0.597,
"step": 3424
},
{
"epoch": 0.22,
"grad_norm": 0.9443843960762024,
"learning_rate": 9.110970044560524e-06,
"loss": 0.6339,
"step": 3425
},
{
"epoch": 0.22,
"grad_norm": 0.8927295804023743,
"learning_rate": 9.1103859548666e-06,
"loss": 0.6615,
"step": 3426
},
{
"epoch": 0.22,
"grad_norm": 1.0299628973007202,
"learning_rate": 9.109801692098624e-06,
"loss": 0.6202,
"step": 3427
},
{
"epoch": 0.22,
"grad_norm": 0.8992637991905212,
"learning_rate": 9.109217256281196e-06,
"loss": 0.5849,
"step": 3428
},
{
"epoch": 0.22,
"grad_norm": 0.9295695424079895,
"learning_rate": 9.108632647438922e-06,
"loss": 0.6528,
"step": 3429
},
{
"epoch": 0.22,
"grad_norm": 0.9012725949287415,
"learning_rate": 9.108047865596421e-06,
"loss": 0.6073,
"step": 3430
},
{
"epoch": 0.22,
"grad_norm": 0.9266906380653381,
"learning_rate": 9.107462910778316e-06,
"loss": 0.5892,
"step": 3431
},
{
"epoch": 0.22,
"grad_norm": 0.8764145970344543,
"learning_rate": 9.106877783009236e-06,
"loss": 0.6318,
"step": 3432
},
{
"epoch": 0.22,
"grad_norm": 0.9359897375106812,
"learning_rate": 9.106292482313819e-06,
"loss": 0.6241,
"step": 3433
},
{
"epoch": 0.22,
"grad_norm": 0.8579049706459045,
"learning_rate": 9.105707008716712e-06,
"loss": 0.5882,
"step": 3434
},
{
"epoch": 0.22,
"grad_norm": 0.9599249362945557,
"learning_rate": 9.105121362242564e-06,
"loss": 0.6201,
"step": 3435
},
{
"epoch": 0.22,
"grad_norm": 0.8719608187675476,
"learning_rate": 9.104535542916035e-06,
"loss": 0.5912,
"step": 3436
},
{
"epoch": 0.22,
"grad_norm": 0.9004802107810974,
"learning_rate": 9.103949550761795e-06,
"loss": 0.6307,
"step": 3437
},
{
"epoch": 0.22,
"grad_norm": 0.9485353827476501,
"learning_rate": 9.103363385804516e-06,
"loss": 0.6454,
"step": 3438
},
{
"epoch": 0.22,
"grad_norm": 0.9338861703872681,
"learning_rate": 9.102777048068878e-06,
"loss": 0.6493,
"step": 3439
},
{
"epoch": 0.22,
"grad_norm": 0.9316098093986511,
"learning_rate": 9.102190537579572e-06,
"loss": 0.6294,
"step": 3440
},
{
"epoch": 0.22,
"grad_norm": 0.880497932434082,
"learning_rate": 9.101603854361291e-06,
"loss": 0.6074,
"step": 3441
},
{
"epoch": 0.22,
"grad_norm": 0.9129565954208374,
"learning_rate": 9.101016998438743e-06,
"loss": 0.5802,
"step": 3442
},
{
"epoch": 0.22,
"grad_norm": 0.9664899706840515,
"learning_rate": 9.100429969836636e-06,
"loss": 0.6579,
"step": 3443
},
{
"epoch": 0.22,
"grad_norm": 0.928165853023529,
"learning_rate": 9.099842768579685e-06,
"loss": 0.6038,
"step": 3444
},
{
"epoch": 0.22,
"grad_norm": 0.909015953540802,
"learning_rate": 9.099255394692618e-06,
"loss": 0.5444,
"step": 3445
},
{
"epoch": 0.22,
"grad_norm": 0.9096186757087708,
"learning_rate": 9.098667848200167e-06,
"loss": 0.5793,
"step": 3446
},
{
"epoch": 0.22,
"grad_norm": 0.969042956829071,
"learning_rate": 9.09808012912707e-06,
"loss": 0.6421,
"step": 3447
},
{
"epoch": 0.22,
"grad_norm": 0.9990017414093018,
"learning_rate": 9.097492237498076e-06,
"loss": 0.6331,
"step": 3448
},
{
"epoch": 0.22,
"grad_norm": 0.8431956768035889,
"learning_rate": 9.096904173337937e-06,
"loss": 0.6034,
"step": 3449
},
{
"epoch": 0.22,
"grad_norm": 0.967842698097229,
"learning_rate": 9.096315936671416e-06,
"loss": 0.6123,
"step": 3450
},
{
"epoch": 0.22,
"grad_norm": 0.9783948063850403,
"learning_rate": 9.095727527523282e-06,
"loss": 0.6611,
"step": 3451
},
{
"epoch": 0.22,
"grad_norm": 0.9480175971984863,
"learning_rate": 9.095138945918309e-06,
"loss": 0.6269,
"step": 3452
},
{
"epoch": 0.22,
"grad_norm": 0.8809651732444763,
"learning_rate": 9.094550191881281e-06,
"loss": 0.5726,
"step": 3453
},
{
"epoch": 0.22,
"grad_norm": 0.9356509447097778,
"learning_rate": 9.093961265436988e-06,
"loss": 0.6504,
"step": 3454
},
{
"epoch": 0.22,
"grad_norm": 0.8500334024429321,
"learning_rate": 9.093372166610229e-06,
"loss": 0.619,
"step": 3455
},
{
"epoch": 0.22,
"grad_norm": 0.8734151124954224,
"learning_rate": 9.092782895425806e-06,
"loss": 0.5817,
"step": 3456
},
{
"epoch": 0.22,
"grad_norm": 0.8919950723648071,
"learning_rate": 9.092193451908533e-06,
"loss": 0.6438,
"step": 3457
},
{
"epoch": 0.22,
"grad_norm": 0.9189222455024719,
"learning_rate": 9.091603836083231e-06,
"loss": 0.6717,
"step": 3458
},
{
"epoch": 0.22,
"grad_norm": 0.941829264163971,
"learning_rate": 9.091014047974725e-06,
"loss": 0.5565,
"step": 3459
},
{
"epoch": 0.22,
"grad_norm": 0.9333182573318481,
"learning_rate": 9.090424087607848e-06,
"loss": 0.6282,
"step": 3460
},
{
"epoch": 0.22,
"grad_norm": 0.8771211504936218,
"learning_rate": 9.089833955007443e-06,
"loss": 0.5849,
"step": 3461
},
{
"epoch": 0.22,
"grad_norm": 0.9246846437454224,
"learning_rate": 9.089243650198359e-06,
"loss": 0.6186,
"step": 3462
},
{
"epoch": 0.22,
"grad_norm": 0.8576235771179199,
"learning_rate": 9.088653173205449e-06,
"loss": 0.5996,
"step": 3463
},
{
"epoch": 0.22,
"grad_norm": 0.9263531565666199,
"learning_rate": 9.088062524053575e-06,
"loss": 0.6116,
"step": 3464
},
{
"epoch": 0.22,
"grad_norm": 0.8749649524688721,
"learning_rate": 9.087471702767612e-06,
"loss": 0.5922,
"step": 3465
},
{
"epoch": 0.22,
"grad_norm": 0.9297971725463867,
"learning_rate": 9.086880709372434e-06,
"loss": 0.6259,
"step": 3466
},
{
"epoch": 0.22,
"grad_norm": 0.8290271759033203,
"learning_rate": 9.086289543892928e-06,
"loss": 0.5753,
"step": 3467
},
{
"epoch": 0.22,
"grad_norm": 0.9221488833427429,
"learning_rate": 9.085698206353983e-06,
"loss": 0.5982,
"step": 3468
},
{
"epoch": 0.22,
"grad_norm": 0.8664331436157227,
"learning_rate": 9.085106696780499e-06,
"loss": 0.5829,
"step": 3469
},
{
"epoch": 0.22,
"grad_norm": 0.943659245967865,
"learning_rate": 9.084515015197384e-06,
"loss": 0.6722,
"step": 3470
},
{
"epoch": 0.22,
"grad_norm": 0.9838310480117798,
"learning_rate": 9.08392316162955e-06,
"loss": 0.6407,
"step": 3471
},
{
"epoch": 0.22,
"grad_norm": 0.9057297110557556,
"learning_rate": 9.083331136101921e-06,
"loss": 0.6113,
"step": 3472
},
{
"epoch": 0.22,
"grad_norm": 0.872379720211029,
"learning_rate": 9.08273893863942e-06,
"loss": 0.5884,
"step": 3473
},
{
"epoch": 0.22,
"grad_norm": 0.9110143184661865,
"learning_rate": 9.082146569266988e-06,
"loss": 0.5865,
"step": 3474
},
{
"epoch": 0.22,
"grad_norm": 0.9769248366355896,
"learning_rate": 9.081554028009562e-06,
"loss": 0.6642,
"step": 3475
},
{
"epoch": 0.22,
"grad_norm": 0.8390948176383972,
"learning_rate": 9.080961314892096e-06,
"loss": 0.6116,
"step": 3476
},
{
"epoch": 0.22,
"grad_norm": 0.9101285338401794,
"learning_rate": 9.080368429939546e-06,
"loss": 0.6063,
"step": 3477
},
{
"epoch": 0.22,
"grad_norm": 0.9952099323272705,
"learning_rate": 9.079775373176874e-06,
"loss": 0.6302,
"step": 3478
},
{
"epoch": 0.22,
"grad_norm": 0.9361991286277771,
"learning_rate": 9.079182144629055e-06,
"loss": 0.6237,
"step": 3479
},
{
"epoch": 0.22,
"grad_norm": 0.8918977975845337,
"learning_rate": 9.078588744321067e-06,
"loss": 0.5958,
"step": 3480
},
{
"epoch": 0.22,
"grad_norm": 0.9270057082176208,
"learning_rate": 9.077995172277894e-06,
"loss": 0.598,
"step": 3481
},
{
"epoch": 0.22,
"grad_norm": 0.9182881712913513,
"learning_rate": 9.07740142852453e-06,
"loss": 0.6355,
"step": 3482
},
{
"epoch": 0.22,
"grad_norm": 0.9537854194641113,
"learning_rate": 9.076807513085976e-06,
"loss": 0.6256,
"step": 3483
},
{
"epoch": 0.22,
"grad_norm": 0.9619026780128479,
"learning_rate": 9.076213425987242e-06,
"loss": 0.6517,
"step": 3484
},
{
"epoch": 0.22,
"grad_norm": 0.9318684339523315,
"learning_rate": 9.07561916725334e-06,
"loss": 0.6745,
"step": 3485
},
{
"epoch": 0.22,
"grad_norm": 0.9609551429748535,
"learning_rate": 9.075024736909292e-06,
"loss": 0.6062,
"step": 3486
},
{
"epoch": 0.22,
"grad_norm": 0.9244940280914307,
"learning_rate": 9.074430134980129e-06,
"loss": 0.6348,
"step": 3487
},
{
"epoch": 0.22,
"grad_norm": 0.9906083941459656,
"learning_rate": 9.073835361490885e-06,
"loss": 0.6681,
"step": 3488
},
{
"epoch": 0.22,
"grad_norm": 0.9201457500457764,
"learning_rate": 9.073240416466609e-06,
"loss": 0.6429,
"step": 3489
},
{
"epoch": 0.22,
"grad_norm": 0.8737314939498901,
"learning_rate": 9.072645299932347e-06,
"loss": 0.6151,
"step": 3490
},
{
"epoch": 0.22,
"grad_norm": 0.8806108832359314,
"learning_rate": 9.07205001191316e-06,
"loss": 0.6441,
"step": 3491
},
{
"epoch": 0.22,
"grad_norm": 0.8998177647590637,
"learning_rate": 9.071454552434111e-06,
"loss": 0.6407,
"step": 3492
},
{
"epoch": 0.22,
"grad_norm": 0.8950275778770447,
"learning_rate": 9.070858921520276e-06,
"loss": 0.6341,
"step": 3493
},
{
"epoch": 0.22,
"grad_norm": 0.8834101557731628,
"learning_rate": 9.070263119196734e-06,
"loss": 0.6065,
"step": 3494
},
{
"epoch": 0.22,
"grad_norm": 0.9296960830688477,
"learning_rate": 9.06966714548857e-06,
"loss": 0.6307,
"step": 3495
},
{
"epoch": 0.22,
"grad_norm": 0.8565431833267212,
"learning_rate": 9.069071000420879e-06,
"loss": 0.607,
"step": 3496
},
{
"epoch": 0.22,
"grad_norm": 0.9660019874572754,
"learning_rate": 9.068474684018765e-06,
"loss": 0.5671,
"step": 3497
},
{
"epoch": 0.22,
"grad_norm": 0.9031816124916077,
"learning_rate": 9.067878196307334e-06,
"loss": 0.6158,
"step": 3498
},
{
"epoch": 0.22,
"grad_norm": 0.8707241415977478,
"learning_rate": 9.067281537311705e-06,
"loss": 0.5942,
"step": 3499
},
{
"epoch": 0.22,
"grad_norm": 0.9110444188117981,
"learning_rate": 9.066684707056999e-06,
"loss": 0.5835,
"step": 3500
},
{
"epoch": 0.22,
"grad_norm": 0.9043798446655273,
"learning_rate": 9.066087705568346e-06,
"loss": 0.6047,
"step": 3501
},
{
"epoch": 0.22,
"grad_norm": 0.9171016216278076,
"learning_rate": 9.065490532870884e-06,
"loss": 0.5593,
"step": 3502
},
{
"epoch": 0.22,
"grad_norm": 0.9416684508323669,
"learning_rate": 9.06489318898976e-06,
"loss": 0.6465,
"step": 3503
},
{
"epoch": 0.22,
"grad_norm": 0.9238849878311157,
"learning_rate": 9.064295673950125e-06,
"loss": 0.653,
"step": 3504
},
{
"epoch": 0.22,
"grad_norm": 0.9581873416900635,
"learning_rate": 9.063697987777136e-06,
"loss": 0.6547,
"step": 3505
},
{
"epoch": 0.22,
"grad_norm": 0.907537579536438,
"learning_rate": 9.063100130495962e-06,
"loss": 0.6362,
"step": 3506
},
{
"epoch": 0.22,
"grad_norm": 0.8580865859985352,
"learning_rate": 9.062502102131777e-06,
"loss": 0.6312,
"step": 3507
},
{
"epoch": 0.22,
"grad_norm": 0.9068456888198853,
"learning_rate": 9.06190390270976e-06,
"loss": 0.6583,
"step": 3508
},
{
"epoch": 0.22,
"grad_norm": 0.8349429368972778,
"learning_rate": 9.0613055322551e-06,
"loss": 0.6388,
"step": 3509
},
{
"epoch": 0.22,
"grad_norm": 0.8973667621612549,
"learning_rate": 9.060706990792993e-06,
"loss": 0.6076,
"step": 3510
},
{
"epoch": 0.22,
"grad_norm": 0.8447120189666748,
"learning_rate": 9.06010827834864e-06,
"loss": 0.6158,
"step": 3511
},
{
"epoch": 0.22,
"grad_norm": 0.8853378295898438,
"learning_rate": 9.059509394947252e-06,
"loss": 0.6026,
"step": 3512
},
{
"epoch": 0.22,
"grad_norm": 0.9272050857543945,
"learning_rate": 9.058910340614045e-06,
"loss": 0.6184,
"step": 3513
},
{
"epoch": 0.22,
"grad_norm": 0.8689481616020203,
"learning_rate": 9.058311115374244e-06,
"loss": 0.6424,
"step": 3514
},
{
"epoch": 0.22,
"grad_norm": 0.8084876537322998,
"learning_rate": 9.057711719253077e-06,
"loss": 0.5953,
"step": 3515
},
{
"epoch": 0.22,
"grad_norm": 0.9154835343360901,
"learning_rate": 9.057112152275788e-06,
"loss": 0.6471,
"step": 3516
},
{
"epoch": 0.22,
"grad_norm": 0.9204840660095215,
"learning_rate": 9.05651241446762e-06,
"loss": 0.6131,
"step": 3517
},
{
"epoch": 0.22,
"grad_norm": 0.8655226826667786,
"learning_rate": 9.055912505853826e-06,
"loss": 0.577,
"step": 3518
},
{
"epoch": 0.22,
"grad_norm": 0.9272779226303101,
"learning_rate": 9.055312426459663e-06,
"loss": 0.6352,
"step": 3519
},
{
"epoch": 0.22,
"grad_norm": 0.955590009689331,
"learning_rate": 9.054712176310405e-06,
"loss": 0.7019,
"step": 3520
},
{
"epoch": 0.22,
"grad_norm": 0.898430585861206,
"learning_rate": 9.05411175543132e-06,
"loss": 0.5869,
"step": 3521
},
{
"epoch": 0.22,
"grad_norm": 0.908953845500946,
"learning_rate": 9.053511163847694e-06,
"loss": 0.5677,
"step": 3522
},
{
"epoch": 0.22,
"grad_norm": 0.9534192085266113,
"learning_rate": 9.052910401584812e-06,
"loss": 0.6819,
"step": 3523
},
{
"epoch": 0.22,
"grad_norm": 0.8306724429130554,
"learning_rate": 9.052309468667974e-06,
"loss": 0.591,
"step": 3524
},
{
"epoch": 0.22,
"grad_norm": 0.7923970818519592,
"learning_rate": 9.05170836512248e-06,
"loss": 0.5629,
"step": 3525
},
{
"epoch": 0.22,
"grad_norm": 0.9316359162330627,
"learning_rate": 9.051107090973642e-06,
"loss": 0.6095,
"step": 3526
},
{
"epoch": 0.22,
"grad_norm": 0.9419963359832764,
"learning_rate": 9.050505646246777e-06,
"loss": 0.6759,
"step": 3527
},
{
"epoch": 0.22,
"grad_norm": 0.8494296669960022,
"learning_rate": 9.04990403096721e-06,
"loss": 0.582,
"step": 3528
},
{
"epoch": 0.22,
"grad_norm": 0.9111973643302917,
"learning_rate": 9.049302245160273e-06,
"loss": 0.6585,
"step": 3529
},
{
"epoch": 0.22,
"grad_norm": 0.906576931476593,
"learning_rate": 9.048700288851305e-06,
"loss": 0.6302,
"step": 3530
},
{
"epoch": 0.22,
"grad_norm": 0.8784658312797546,
"learning_rate": 9.048098162065652e-06,
"loss": 0.653,
"step": 3531
},
{
"epoch": 0.22,
"grad_norm": 0.8768582344055176,
"learning_rate": 9.047495864828668e-06,
"loss": 0.6133,
"step": 3532
},
{
"epoch": 0.22,
"grad_norm": 0.8660056591033936,
"learning_rate": 9.046893397165713e-06,
"loss": 0.6245,
"step": 3533
},
{
"epoch": 0.22,
"grad_norm": 0.923427939414978,
"learning_rate": 9.046290759102155e-06,
"loss": 0.6139,
"step": 3534
},
{
"epoch": 0.22,
"grad_norm": 0.8469942212104797,
"learning_rate": 9.04568795066337e-06,
"loss": 0.5996,
"step": 3535
},
{
"epoch": 0.22,
"grad_norm": 0.9075682759284973,
"learning_rate": 9.045084971874738e-06,
"loss": 0.6336,
"step": 3536
},
{
"epoch": 0.22,
"grad_norm": 0.9033473134040833,
"learning_rate": 9.044481822761651e-06,
"loss": 0.6277,
"step": 3537
},
{
"epoch": 0.22,
"grad_norm": 0.9756919145584106,
"learning_rate": 9.043878503349503e-06,
"loss": 0.6174,
"step": 3538
},
{
"epoch": 0.22,
"grad_norm": 0.8659248352050781,
"learning_rate": 9.043275013663699e-06,
"loss": 0.5844,
"step": 3539
},
{
"epoch": 0.22,
"grad_norm": 0.9130862355232239,
"learning_rate": 9.04267135372965e-06,
"loss": 0.6517,
"step": 3540
},
{
"epoch": 0.22,
"grad_norm": 0.9586864709854126,
"learning_rate": 9.042067523572775e-06,
"loss": 0.6081,
"step": 3541
},
{
"epoch": 0.22,
"grad_norm": 0.9088827967643738,
"learning_rate": 9.041463523218496e-06,
"loss": 0.6973,
"step": 3542
},
{
"epoch": 0.22,
"grad_norm": 0.9386407136917114,
"learning_rate": 9.040859352692249e-06,
"loss": 0.6771,
"step": 3543
},
{
"epoch": 0.22,
"grad_norm": 0.9016104340553284,
"learning_rate": 9.04025501201947e-06,
"loss": 0.6504,
"step": 3544
},
{
"epoch": 0.22,
"grad_norm": 0.8565789461135864,
"learning_rate": 9.039650501225608e-06,
"loss": 0.6128,
"step": 3545
},
{
"epoch": 0.22,
"grad_norm": 0.8813103437423706,
"learning_rate": 9.039045820336116e-06,
"loss": 0.6283,
"step": 3546
},
{
"epoch": 0.22,
"grad_norm": 0.883348286151886,
"learning_rate": 9.038440969376456e-06,
"loss": 0.6106,
"step": 3547
},
{
"epoch": 0.22,
"grad_norm": 0.8444504737854004,
"learning_rate": 9.037835948372095e-06,
"loss": 0.5763,
"step": 3548
},
{
"epoch": 0.22,
"grad_norm": 0.856566846370697,
"learning_rate": 9.03723075734851e-06,
"loss": 0.601,
"step": 3549
},
{
"epoch": 0.22,
"grad_norm": 0.9262292385101318,
"learning_rate": 9.03662539633118e-06,
"loss": 0.5976,
"step": 3550
},
{
"epoch": 0.22,
"grad_norm": 0.931098461151123,
"learning_rate": 9.0360198653456e-06,
"loss": 0.6391,
"step": 3551
},
{
"epoch": 0.23,
"grad_norm": 0.9265716075897217,
"learning_rate": 9.035414164417262e-06,
"loss": 0.5899,
"step": 3552
},
{
"epoch": 0.23,
"grad_norm": 0.9725390672683716,
"learning_rate": 9.034808293571672e-06,
"loss": 0.6615,
"step": 3553
},
{
"epoch": 0.23,
"grad_norm": 0.9248775839805603,
"learning_rate": 9.03420225283434e-06,
"loss": 0.6472,
"step": 3554
},
{
"epoch": 0.23,
"grad_norm": 0.851396381855011,
"learning_rate": 9.033596042230788e-06,
"loss": 0.5497,
"step": 3555
},
{
"epoch": 0.23,
"grad_norm": 0.9172872304916382,
"learning_rate": 9.032989661786535e-06,
"loss": 0.65,
"step": 3556
},
{
"epoch": 0.23,
"grad_norm": 0.8770195841789246,
"learning_rate": 9.032383111527119e-06,
"loss": 0.6253,
"step": 3557
},
{
"epoch": 0.23,
"grad_norm": 0.9005029201507568,
"learning_rate": 9.031776391478077e-06,
"loss": 0.631,
"step": 3558
},
{
"epoch": 0.23,
"grad_norm": 0.8701792359352112,
"learning_rate": 9.031169501664958e-06,
"loss": 0.6235,
"step": 3559
},
{
"epoch": 0.23,
"grad_norm": 0.9129980206489563,
"learning_rate": 9.030562442113313e-06,
"loss": 0.6273,
"step": 3560
},
{
"epoch": 0.23,
"grad_norm": 0.9068407416343689,
"learning_rate": 9.029955212848706e-06,
"loss": 0.6408,
"step": 3561
},
{
"epoch": 0.23,
"grad_norm": 0.9272667169570923,
"learning_rate": 9.029347813896704e-06,
"loss": 0.5862,
"step": 3562
},
{
"epoch": 0.23,
"grad_norm": 0.8602524399757385,
"learning_rate": 9.028740245282881e-06,
"loss": 0.6004,
"step": 3563
},
{
"epoch": 0.23,
"grad_norm": 0.9108449220657349,
"learning_rate": 9.028132507032823e-06,
"loss": 0.6113,
"step": 3564
},
{
"epoch": 0.23,
"grad_norm": 0.8397127985954285,
"learning_rate": 9.027524599172117e-06,
"loss": 0.601,
"step": 3565
},
{
"epoch": 0.23,
"grad_norm": 0.9540258049964905,
"learning_rate": 9.026916521726361e-06,
"loss": 0.6869,
"step": 3566
},
{
"epoch": 0.23,
"grad_norm": 0.9084812998771667,
"learning_rate": 9.026308274721161e-06,
"loss": 0.5817,
"step": 3567
},
{
"epoch": 0.23,
"grad_norm": 0.9301480650901794,
"learning_rate": 9.025699858182125e-06,
"loss": 0.5917,
"step": 3568
},
{
"epoch": 0.23,
"grad_norm": 0.8542090058326721,
"learning_rate": 9.02509127213487e-06,
"loss": 0.6182,
"step": 3569
},
{
"epoch": 0.23,
"grad_norm": 0.8809559941291809,
"learning_rate": 9.024482516605026e-06,
"loss": 0.5781,
"step": 3570
},
{
"epoch": 0.23,
"grad_norm": 0.9583331346511841,
"learning_rate": 9.023873591618224e-06,
"loss": 0.6249,
"step": 3571
},
{
"epoch": 0.23,
"grad_norm": 0.9110972881317139,
"learning_rate": 9.023264497200102e-06,
"loss": 0.624,
"step": 3572
},
{
"epoch": 0.23,
"grad_norm": 0.876470685005188,
"learning_rate": 9.022655233376308e-06,
"loss": 0.6066,
"step": 3573
},
{
"epoch": 0.23,
"grad_norm": 0.8327741622924805,
"learning_rate": 9.022045800172493e-06,
"loss": 0.592,
"step": 3574
},
{
"epoch": 0.23,
"grad_norm": 0.9016212821006775,
"learning_rate": 9.021436197614326e-06,
"loss": 0.5999,
"step": 3575
},
{
"epoch": 0.23,
"grad_norm": 0.9393583536148071,
"learning_rate": 9.020826425727468e-06,
"loss": 0.6292,
"step": 3576
},
{
"epoch": 0.23,
"grad_norm": 0.8916171193122864,
"learning_rate": 9.020216484537595e-06,
"loss": 0.6681,
"step": 3577
},
{
"epoch": 0.23,
"grad_norm": 0.9707697629928589,
"learning_rate": 9.019606374070394e-06,
"loss": 0.6506,
"step": 3578
},
{
"epoch": 0.23,
"grad_norm": 0.9742267727851868,
"learning_rate": 9.01899609435155e-06,
"loss": 0.6432,
"step": 3579
},
{
"epoch": 0.23,
"grad_norm": 0.9248902201652527,
"learning_rate": 9.018385645406765e-06,
"loss": 0.627,
"step": 3580
},
{
"epoch": 0.23,
"grad_norm": 0.8701397180557251,
"learning_rate": 9.017775027261735e-06,
"loss": 0.6343,
"step": 3581
},
{
"epoch": 0.23,
"grad_norm": 0.8465285897254944,
"learning_rate": 9.017164239942178e-06,
"loss": 0.6101,
"step": 3582
},
{
"epoch": 0.23,
"grad_norm": 0.8863876461982727,
"learning_rate": 9.016553283473808e-06,
"loss": 0.6401,
"step": 3583
},
{
"epoch": 0.23,
"grad_norm": 0.8480295538902283,
"learning_rate": 9.015942157882353e-06,
"loss": 0.5718,
"step": 3584
},
{
"epoch": 0.23,
"grad_norm": 0.8785873055458069,
"learning_rate": 9.015330863193543e-06,
"loss": 0.6074,
"step": 3585
},
{
"epoch": 0.23,
"grad_norm": 0.8759261965751648,
"learning_rate": 9.01471939943312e-06,
"loss": 0.6138,
"step": 3586
},
{
"epoch": 0.23,
"grad_norm": 0.8847134113311768,
"learning_rate": 9.014107766626828e-06,
"loss": 0.5651,
"step": 3587
},
{
"epoch": 0.23,
"grad_norm": 0.8662316203117371,
"learning_rate": 9.013495964800423e-06,
"loss": 0.6643,
"step": 3588
},
{
"epoch": 0.23,
"grad_norm": 0.8825305700302124,
"learning_rate": 9.012883993979663e-06,
"loss": 0.7025,
"step": 3589
},
{
"epoch": 0.23,
"grad_norm": 0.8754686713218689,
"learning_rate": 9.01227185419032e-06,
"loss": 0.6276,
"step": 3590
},
{
"epoch": 0.23,
"grad_norm": 0.9244438409805298,
"learning_rate": 9.011659545458167e-06,
"loss": 0.5912,
"step": 3591
},
{
"epoch": 0.23,
"grad_norm": 0.9278584718704224,
"learning_rate": 9.011047067808985e-06,
"loss": 0.6441,
"step": 3592
},
{
"epoch": 0.23,
"grad_norm": 0.853956401348114,
"learning_rate": 9.010434421268564e-06,
"loss": 0.5881,
"step": 3593
},
{
"epoch": 0.23,
"grad_norm": 0.903804361820221,
"learning_rate": 9.009821605862701e-06,
"loss": 0.6268,
"step": 3594
},
{
"epoch": 0.23,
"grad_norm": 0.884956955909729,
"learning_rate": 9.0092086216172e-06,
"loss": 0.5976,
"step": 3595
},
{
"epoch": 0.23,
"grad_norm": 0.8600631952285767,
"learning_rate": 9.00859546855787e-06,
"loss": 0.5976,
"step": 3596
},
{
"epoch": 0.23,
"grad_norm": 0.8109932541847229,
"learning_rate": 9.007982146710533e-06,
"loss": 0.5807,
"step": 3597
},
{
"epoch": 0.23,
"grad_norm": 0.8790200352668762,
"learning_rate": 9.007368656101006e-06,
"loss": 0.6335,
"step": 3598
},
{
"epoch": 0.23,
"grad_norm": 0.8840540647506714,
"learning_rate": 9.006754996755129e-06,
"loss": 0.5932,
"step": 3599
},
{
"epoch": 0.23,
"grad_norm": 0.9371446967124939,
"learning_rate": 9.006141168698735e-06,
"loss": 0.6723,
"step": 3600
},
{
"epoch": 0.23,
"grad_norm": 0.8178922533988953,
"learning_rate": 9.005527171957676e-06,
"loss": 0.5882,
"step": 3601
},
{
"epoch": 0.23,
"grad_norm": 0.9067853093147278,
"learning_rate": 9.004913006557798e-06,
"loss": 0.6432,
"step": 3602
},
{
"epoch": 0.23,
"grad_norm": 0.8906139135360718,
"learning_rate": 9.004298672524967e-06,
"loss": 0.6492,
"step": 3603
},
{
"epoch": 0.23,
"grad_norm": 0.8456130623817444,
"learning_rate": 9.003684169885049e-06,
"loss": 0.6127,
"step": 3604
},
{
"epoch": 0.23,
"grad_norm": 0.8719025254249573,
"learning_rate": 9.00306949866392e-06,
"loss": 0.6,
"step": 3605
},
{
"epoch": 0.23,
"grad_norm": 0.8929893970489502,
"learning_rate": 9.002454658887458e-06,
"loss": 0.5915,
"step": 3606
},
{
"epoch": 0.23,
"grad_norm": 0.9277382493019104,
"learning_rate": 9.001839650581554e-06,
"loss": 0.6316,
"step": 3607
},
{
"epoch": 0.23,
"grad_norm": 0.9326600432395935,
"learning_rate": 9.001224473772104e-06,
"loss": 0.6662,
"step": 3608
},
{
"epoch": 0.23,
"grad_norm": 0.8711685538291931,
"learning_rate": 9.000609128485011e-06,
"loss": 0.5557,
"step": 3609
},
{
"epoch": 0.23,
"grad_norm": 0.9938933849334717,
"learning_rate": 8.999993614746184e-06,
"loss": 0.6923,
"step": 3610
},
{
"epoch": 0.23,
"grad_norm": 0.8392737507820129,
"learning_rate": 8.999377932581541e-06,
"loss": 0.5789,
"step": 3611
},
{
"epoch": 0.23,
"grad_norm": 0.9159629344940186,
"learning_rate": 8.998762082017006e-06,
"loss": 0.6179,
"step": 3612
},
{
"epoch": 0.23,
"grad_norm": 0.9216225743293762,
"learning_rate": 8.998146063078512e-06,
"loss": 0.6627,
"step": 3613
},
{
"epoch": 0.23,
"grad_norm": 0.8778311610221863,
"learning_rate": 8.997529875791993e-06,
"loss": 0.6039,
"step": 3614
},
{
"epoch": 0.23,
"grad_norm": 0.9303637742996216,
"learning_rate": 8.9969135201834e-06,
"loss": 0.6187,
"step": 3615
},
{
"epoch": 0.23,
"grad_norm": 0.9529017806053162,
"learning_rate": 8.996296996278682e-06,
"loss": 0.6698,
"step": 3616
},
{
"epoch": 0.23,
"grad_norm": 0.8703224658966064,
"learning_rate": 8.9956803041038e-06,
"loss": 0.6047,
"step": 3617
},
{
"epoch": 0.23,
"grad_norm": 0.8795974254608154,
"learning_rate": 8.99506344368472e-06,
"loss": 0.5653,
"step": 3618
},
{
"epoch": 0.23,
"grad_norm": 0.8557493090629578,
"learning_rate": 8.994446415047415e-06,
"loss": 0.5735,
"step": 3619
},
{
"epoch": 0.23,
"grad_norm": 0.8863241672515869,
"learning_rate": 8.993829218217867e-06,
"loss": 0.6194,
"step": 3620
},
{
"epoch": 0.23,
"grad_norm": 0.8855205178260803,
"learning_rate": 8.993211853222065e-06,
"loss": 0.6443,
"step": 3621
},
{
"epoch": 0.23,
"grad_norm": 0.9319906830787659,
"learning_rate": 8.992594320086005e-06,
"loss": 0.6551,
"step": 3622
},
{
"epoch": 0.23,
"grad_norm": 0.863646924495697,
"learning_rate": 8.991976618835685e-06,
"loss": 0.6152,
"step": 3623
},
{
"epoch": 0.23,
"grad_norm": 0.9434888362884521,
"learning_rate": 8.991358749497117e-06,
"loss": 0.6381,
"step": 3624
},
{
"epoch": 0.23,
"grad_norm": 0.9003688097000122,
"learning_rate": 8.990740712096317e-06,
"loss": 0.6295,
"step": 3625
},
{
"epoch": 0.23,
"grad_norm": 0.8399546146392822,
"learning_rate": 8.99012250665931e-06,
"loss": 0.5874,
"step": 3626
},
{
"epoch": 0.23,
"grad_norm": 0.9593385457992554,
"learning_rate": 8.989504133212123e-06,
"loss": 0.7235,
"step": 3627
},
{
"epoch": 0.23,
"grad_norm": 0.8997763991355896,
"learning_rate": 8.988885591780795e-06,
"loss": 0.6178,
"step": 3628
},
{
"epoch": 0.23,
"grad_norm": 0.888486385345459,
"learning_rate": 8.988266882391374e-06,
"loss": 0.6022,
"step": 3629
},
{
"epoch": 0.23,
"grad_norm": 0.8956373929977417,
"learning_rate": 8.987648005069907e-06,
"loss": 0.6401,
"step": 3630
},
{
"epoch": 0.23,
"grad_norm": 0.9338024854660034,
"learning_rate": 8.987028959842454e-06,
"loss": 0.6308,
"step": 3631
},
{
"epoch": 0.23,
"grad_norm": 0.9498031139373779,
"learning_rate": 8.986409746735084e-06,
"loss": 0.5903,
"step": 3632
},
{
"epoch": 0.23,
"grad_norm": 0.9222273826599121,
"learning_rate": 8.985790365773864e-06,
"loss": 0.6238,
"step": 3633
},
{
"epoch": 0.23,
"grad_norm": 0.8916066884994507,
"learning_rate": 8.985170816984878e-06,
"loss": 0.5938,
"step": 3634
},
{
"epoch": 0.23,
"grad_norm": 0.901877760887146,
"learning_rate": 8.984551100394212e-06,
"loss": 0.6308,
"step": 3635
},
{
"epoch": 0.23,
"grad_norm": 0.9297860264778137,
"learning_rate": 8.98393121602796e-06,
"loss": 0.6093,
"step": 3636
},
{
"epoch": 0.23,
"grad_norm": 0.9041366577148438,
"learning_rate": 8.983311163912227e-06,
"loss": 0.5895,
"step": 3637
},
{
"epoch": 0.23,
"grad_norm": 0.9007093906402588,
"learning_rate": 8.982690944073113e-06,
"loss": 0.5988,
"step": 3638
},
{
"epoch": 0.23,
"grad_norm": 0.8943149447441101,
"learning_rate": 8.982070556536741e-06,
"loss": 0.6197,
"step": 3639
},
{
"epoch": 0.23,
"grad_norm": 1.2114888429641724,
"learning_rate": 8.98145000132923e-06,
"loss": 0.639,
"step": 3640
},
{
"epoch": 0.23,
"grad_norm": 0.8697226047515869,
"learning_rate": 8.980829278476711e-06,
"loss": 0.612,
"step": 3641
},
{
"epoch": 0.23,
"grad_norm": 0.9321666359901428,
"learning_rate": 8.980208388005318e-06,
"loss": 0.6106,
"step": 3642
},
{
"epoch": 0.23,
"grad_norm": 0.8679060935974121,
"learning_rate": 8.979587329941197e-06,
"loss": 0.635,
"step": 3643
},
{
"epoch": 0.23,
"grad_norm": 0.8968499898910522,
"learning_rate": 8.978966104310497e-06,
"loss": 0.6707,
"step": 3644
},
{
"epoch": 0.23,
"grad_norm": 0.8723733425140381,
"learning_rate": 8.978344711139374e-06,
"loss": 0.5988,
"step": 3645
},
{
"epoch": 0.23,
"grad_norm": 0.9933862090110779,
"learning_rate": 8.977723150453999e-06,
"loss": 0.6475,
"step": 3646
},
{
"epoch": 0.23,
"grad_norm": 0.9089076519012451,
"learning_rate": 8.977101422280536e-06,
"loss": 0.6124,
"step": 3647
},
{
"epoch": 0.23,
"grad_norm": 0.8408138155937195,
"learning_rate": 8.97647952664517e-06,
"loss": 0.5773,
"step": 3648
},
{
"epoch": 0.23,
"grad_norm": 0.8604863882064819,
"learning_rate": 8.975857463574082e-06,
"loss": 0.5365,
"step": 3649
},
{
"epoch": 0.23,
"grad_norm": 0.8891851902008057,
"learning_rate": 8.97523523309347e-06,
"loss": 0.5713,
"step": 3650
},
{
"epoch": 0.23,
"grad_norm": 0.8845424652099609,
"learning_rate": 8.974612835229528e-06,
"loss": 0.6176,
"step": 3651
},
{
"epoch": 0.23,
"grad_norm": 0.8516875505447388,
"learning_rate": 8.973990270008467e-06,
"loss": 0.6297,
"step": 3652
},
{
"epoch": 0.23,
"grad_norm": 0.8722536563873291,
"learning_rate": 8.973367537456502e-06,
"loss": 0.6413,
"step": 3653
},
{
"epoch": 0.23,
"grad_norm": 0.9590871334075928,
"learning_rate": 8.97274463759985e-06,
"loss": 0.6862,
"step": 3654
},
{
"epoch": 0.23,
"grad_norm": 0.952622652053833,
"learning_rate": 8.972121570464744e-06,
"loss": 0.6474,
"step": 3655
},
{
"epoch": 0.23,
"grad_norm": 0.8545388579368591,
"learning_rate": 8.971498336077415e-06,
"loss": 0.6003,
"step": 3656
},
{
"epoch": 0.23,
"grad_norm": 0.9351946115493774,
"learning_rate": 8.970874934464108e-06,
"loss": 0.6054,
"step": 3657
},
{
"epoch": 0.23,
"grad_norm": 0.8703538179397583,
"learning_rate": 8.970251365651071e-06,
"loss": 0.6466,
"step": 3658
},
{
"epoch": 0.23,
"grad_norm": 0.908089816570282,
"learning_rate": 8.969627629664559e-06,
"loss": 0.5536,
"step": 3659
},
{
"epoch": 0.23,
"grad_norm": 0.8776571154594421,
"learning_rate": 8.969003726530838e-06,
"loss": 0.6136,
"step": 3660
},
{
"epoch": 0.23,
"grad_norm": 0.901607096195221,
"learning_rate": 8.968379656276177e-06,
"loss": 0.6526,
"step": 3661
},
{
"epoch": 0.23,
"grad_norm": 0.9027665853500366,
"learning_rate": 8.967755418926854e-06,
"loss": 0.6117,
"step": 3662
},
{
"epoch": 0.23,
"grad_norm": 0.8891413807868958,
"learning_rate": 8.967131014509152e-06,
"loss": 0.586,
"step": 3663
},
{
"epoch": 0.23,
"grad_norm": 0.9754297137260437,
"learning_rate": 8.966506443049366e-06,
"loss": 0.6608,
"step": 3664
},
{
"epoch": 0.23,
"grad_norm": 0.9024408459663391,
"learning_rate": 8.965881704573789e-06,
"loss": 0.6286,
"step": 3665
},
{
"epoch": 0.23,
"grad_norm": 1.0116610527038574,
"learning_rate": 8.965256799108733e-06,
"loss": 0.6086,
"step": 3666
},
{
"epoch": 0.23,
"grad_norm": 0.9310095310211182,
"learning_rate": 8.964631726680504e-06,
"loss": 0.6619,
"step": 3667
},
{
"epoch": 0.23,
"grad_norm": 0.9279587268829346,
"learning_rate": 8.964006487315426e-06,
"loss": 0.6823,
"step": 3668
},
{
"epoch": 0.23,
"grad_norm": 0.8194244503974915,
"learning_rate": 8.963381081039826e-06,
"loss": 0.6129,
"step": 3669
},
{
"epoch": 0.23,
"grad_norm": 0.8662837147712708,
"learning_rate": 8.962755507880036e-06,
"loss": 0.6013,
"step": 3670
},
{
"epoch": 0.23,
"grad_norm": 0.8568682074546814,
"learning_rate": 8.962129767862395e-06,
"loss": 0.6053,
"step": 3671
},
{
"epoch": 0.23,
"grad_norm": 0.9012435078620911,
"learning_rate": 8.961503861013255e-06,
"loss": 0.6542,
"step": 3672
},
{
"epoch": 0.23,
"grad_norm": 0.8900840282440186,
"learning_rate": 8.960877787358968e-06,
"loss": 0.6021,
"step": 3673
},
{
"epoch": 0.23,
"grad_norm": 0.9452951550483704,
"learning_rate": 8.960251546925895e-06,
"loss": 0.6342,
"step": 3674
},
{
"epoch": 0.23,
"grad_norm": 0.7982982397079468,
"learning_rate": 8.959625139740407e-06,
"loss": 0.5353,
"step": 3675
},
{
"epoch": 0.23,
"grad_norm": 0.9291501045227051,
"learning_rate": 8.95899856582888e-06,
"loss": 0.6417,
"step": 3676
},
{
"epoch": 0.23,
"grad_norm": 0.8522927761077881,
"learning_rate": 8.958371825217693e-06,
"loss": 0.5758,
"step": 3677
},
{
"epoch": 0.23,
"grad_norm": 0.8960750699043274,
"learning_rate": 8.957744917933241e-06,
"loss": 0.5945,
"step": 3678
},
{
"epoch": 0.23,
"grad_norm": 0.8411138653755188,
"learning_rate": 8.957117844001919e-06,
"loss": 0.6068,
"step": 3679
},
{
"epoch": 0.23,
"grad_norm": 0.9141689538955688,
"learning_rate": 8.956490603450128e-06,
"loss": 0.6117,
"step": 3680
},
{
"epoch": 0.23,
"grad_norm": 0.9008049964904785,
"learning_rate": 8.955863196304282e-06,
"loss": 0.6095,
"step": 3681
},
{
"epoch": 0.23,
"grad_norm": 0.9140220284461975,
"learning_rate": 8.9552356225908e-06,
"loss": 0.5998,
"step": 3682
},
{
"epoch": 0.23,
"grad_norm": 0.8372965455055237,
"learning_rate": 8.954607882336105e-06,
"loss": 0.5772,
"step": 3683
},
{
"epoch": 0.23,
"grad_norm": 0.9582294225692749,
"learning_rate": 8.953979975566626e-06,
"loss": 0.6542,
"step": 3684
},
{
"epoch": 0.23,
"grad_norm": 0.9331498146057129,
"learning_rate": 8.953351902308807e-06,
"loss": 0.6334,
"step": 3685
},
{
"epoch": 0.23,
"grad_norm": 0.9214125871658325,
"learning_rate": 8.952723662589093e-06,
"loss": 0.6551,
"step": 3686
},
{
"epoch": 0.23,
"grad_norm": 0.9166949987411499,
"learning_rate": 8.952095256433934e-06,
"loss": 0.6552,
"step": 3687
},
{
"epoch": 0.23,
"grad_norm": 0.9024720191955566,
"learning_rate": 8.951466683869795e-06,
"loss": 0.5499,
"step": 3688
},
{
"epoch": 0.23,
"grad_norm": 0.8689588308334351,
"learning_rate": 8.950837944923138e-06,
"loss": 0.5789,
"step": 3689
},
{
"epoch": 0.23,
"grad_norm": 0.9102311134338379,
"learning_rate": 8.95020903962044e-06,
"loss": 0.6514,
"step": 3690
},
{
"epoch": 0.23,
"grad_norm": 0.9357526302337646,
"learning_rate": 8.94957996798818e-06,
"loss": 0.608,
"step": 3691
},
{
"epoch": 0.23,
"grad_norm": 0.8915140628814697,
"learning_rate": 8.948950730052847e-06,
"loss": 0.6221,
"step": 3692
},
{
"epoch": 0.23,
"grad_norm": 0.9464169144630432,
"learning_rate": 8.948321325840937e-06,
"loss": 0.6701,
"step": 3693
},
{
"epoch": 0.23,
"grad_norm": 0.9279240965843201,
"learning_rate": 8.94769175537895e-06,
"loss": 0.6052,
"step": 3694
},
{
"epoch": 0.23,
"grad_norm": 0.9310309886932373,
"learning_rate": 8.9470620186934e-06,
"loss": 0.6523,
"step": 3695
},
{
"epoch": 0.23,
"grad_norm": 0.930351972579956,
"learning_rate": 8.946432115810795e-06,
"loss": 0.6639,
"step": 3696
},
{
"epoch": 0.23,
"grad_norm": 0.9379802346229553,
"learning_rate": 8.945802046757666e-06,
"loss": 0.6714,
"step": 3697
},
{
"epoch": 0.23,
"grad_norm": 0.903059184551239,
"learning_rate": 8.945171811560535e-06,
"loss": 0.6284,
"step": 3698
},
{
"epoch": 0.23,
"grad_norm": 0.9489940404891968,
"learning_rate": 8.944541410245947e-06,
"loss": 0.5875,
"step": 3699
},
{
"epoch": 0.23,
"grad_norm": 0.9724168181419373,
"learning_rate": 8.943910842840439e-06,
"loss": 0.6435,
"step": 3700
},
{
"epoch": 0.23,
"grad_norm": 0.9340975284576416,
"learning_rate": 8.943280109370568e-06,
"loss": 0.6209,
"step": 3701
},
{
"epoch": 0.23,
"grad_norm": 0.8607521653175354,
"learning_rate": 8.942649209862888e-06,
"loss": 0.5788,
"step": 3702
},
{
"epoch": 0.23,
"grad_norm": 0.8896112442016602,
"learning_rate": 8.942018144343965e-06,
"loss": 0.6177,
"step": 3703
},
{
"epoch": 0.23,
"grad_norm": 0.9297407865524292,
"learning_rate": 8.941386912840372e-06,
"loss": 0.6398,
"step": 3704
},
{
"epoch": 0.23,
"grad_norm": 0.8317979574203491,
"learning_rate": 8.940755515378687e-06,
"loss": 0.6036,
"step": 3705
},
{
"epoch": 0.23,
"grad_norm": 0.9319295287132263,
"learning_rate": 8.940123951985495e-06,
"loss": 0.608,
"step": 3706
},
{
"epoch": 0.23,
"grad_norm": 0.9755576252937317,
"learning_rate": 8.939492222687392e-06,
"loss": 0.6238,
"step": 3707
},
{
"epoch": 0.23,
"grad_norm": 0.8916385173797607,
"learning_rate": 8.938860327510975e-06,
"loss": 0.614,
"step": 3708
},
{
"epoch": 0.23,
"grad_norm": 0.8873549699783325,
"learning_rate": 8.938228266482852e-06,
"loss": 0.6389,
"step": 3709
},
{
"epoch": 0.24,
"grad_norm": 0.8616818785667419,
"learning_rate": 8.937596039629637e-06,
"loss": 0.6028,
"step": 3710
},
{
"epoch": 0.24,
"grad_norm": 0.8916230797767639,
"learning_rate": 8.93696364697795e-06,
"loss": 0.6395,
"step": 3711
},
{
"epoch": 0.24,
"grad_norm": 0.8822511434555054,
"learning_rate": 8.936331088554419e-06,
"loss": 0.5956,
"step": 3712
},
{
"epoch": 0.24,
"grad_norm": 0.8785961270332336,
"learning_rate": 8.93569836438568e-06,
"loss": 0.5859,
"step": 3713
},
{
"epoch": 0.24,
"grad_norm": 0.9163837432861328,
"learning_rate": 8.935065474498375e-06,
"loss": 0.6075,
"step": 3714
},
{
"epoch": 0.24,
"grad_norm": 0.8735101819038391,
"learning_rate": 8.934432418919153e-06,
"loss": 0.6399,
"step": 3715
},
{
"epoch": 0.24,
"grad_norm": 0.877932608127594,
"learning_rate": 8.933799197674667e-06,
"loss": 0.6058,
"step": 3716
},
{
"epoch": 0.24,
"grad_norm": 0.9489808082580566,
"learning_rate": 8.933165810791579e-06,
"loss": 0.6173,
"step": 3717
},
{
"epoch": 0.24,
"grad_norm": 0.8636232018470764,
"learning_rate": 8.932532258296565e-06,
"loss": 0.6418,
"step": 3718
},
{
"epoch": 0.24,
"grad_norm": 0.9418687224388123,
"learning_rate": 8.931898540216297e-06,
"loss": 0.6438,
"step": 3719
},
{
"epoch": 0.24,
"grad_norm": 0.9097021222114563,
"learning_rate": 8.931264656577459e-06,
"loss": 0.6215,
"step": 3720
},
{
"epoch": 0.24,
"grad_norm": 0.8493873476982117,
"learning_rate": 8.930630607406743e-06,
"loss": 0.6228,
"step": 3721
},
{
"epoch": 0.24,
"grad_norm": 0.9140156507492065,
"learning_rate": 8.929996392730844e-06,
"loss": 0.6362,
"step": 3722
},
{
"epoch": 0.24,
"grad_norm": 0.8999550938606262,
"learning_rate": 8.92936201257647e-06,
"loss": 0.6398,
"step": 3723
},
{
"epoch": 0.24,
"grad_norm": 0.8380311727523804,
"learning_rate": 8.928727466970331e-06,
"loss": 0.588,
"step": 3724
},
{
"epoch": 0.24,
"grad_norm": 0.9263492822647095,
"learning_rate": 8.928092755939145e-06,
"loss": 0.6247,
"step": 3725
},
{
"epoch": 0.24,
"grad_norm": 0.9678030014038086,
"learning_rate": 8.927457879509638e-06,
"loss": 0.624,
"step": 3726
},
{
"epoch": 0.24,
"grad_norm": 0.8912070989608765,
"learning_rate": 8.926822837708542e-06,
"loss": 0.6393,
"step": 3727
},
{
"epoch": 0.24,
"grad_norm": 0.844551682472229,
"learning_rate": 8.926187630562597e-06,
"loss": 0.6139,
"step": 3728
},
{
"epoch": 0.24,
"grad_norm": 0.9056801199913025,
"learning_rate": 8.925552258098549e-06,
"loss": 0.5725,
"step": 3729
},
{
"epoch": 0.24,
"grad_norm": 0.8850533962249756,
"learning_rate": 8.924916720343151e-06,
"loss": 0.6235,
"step": 3730
},
{
"epoch": 0.24,
"grad_norm": 0.858784556388855,
"learning_rate": 8.924281017323164e-06,
"loss": 0.59,
"step": 3731
},
{
"epoch": 0.24,
"grad_norm": 0.8923681974411011,
"learning_rate": 8.923645149065354e-06,
"loss": 0.5841,
"step": 3732
},
{
"epoch": 0.24,
"grad_norm": 0.9098735451698303,
"learning_rate": 8.923009115596498e-06,
"loss": 0.5895,
"step": 3733
},
{
"epoch": 0.24,
"grad_norm": 0.8857651352882385,
"learning_rate": 8.922372916943374e-06,
"loss": 0.6612,
"step": 3734
},
{
"epoch": 0.24,
"grad_norm": 0.9229490756988525,
"learning_rate": 8.921736553132772e-06,
"loss": 0.6304,
"step": 3735
},
{
"epoch": 0.24,
"grad_norm": 0.8978235125541687,
"learning_rate": 8.921100024191486e-06,
"loss": 0.5965,
"step": 3736
},
{
"epoch": 0.24,
"grad_norm": 0.8856748938560486,
"learning_rate": 8.920463330146318e-06,
"loss": 0.6114,
"step": 3737
},
{
"epoch": 0.24,
"grad_norm": 0.9307460784912109,
"learning_rate": 8.919826471024078e-06,
"loss": 0.6278,
"step": 3738
},
{
"epoch": 0.24,
"grad_norm": 0.9287357926368713,
"learning_rate": 8.919189446851583e-06,
"loss": 0.5925,
"step": 3739
},
{
"epoch": 0.24,
"grad_norm": 0.9358810782432556,
"learning_rate": 8.918552257655652e-06,
"loss": 0.608,
"step": 3740
},
{
"epoch": 0.24,
"grad_norm": 0.9406039714813232,
"learning_rate": 8.917914903463119e-06,
"loss": 0.6813,
"step": 3741
},
{
"epoch": 0.24,
"grad_norm": 0.9355833530426025,
"learning_rate": 8.917277384300817e-06,
"loss": 0.6438,
"step": 3742
},
{
"epoch": 0.24,
"grad_norm": 0.8952451944351196,
"learning_rate": 8.916639700195593e-06,
"loss": 0.5932,
"step": 3743
},
{
"epoch": 0.24,
"grad_norm": 0.8967479467391968,
"learning_rate": 8.916001851174296e-06,
"loss": 0.6132,
"step": 3744
},
{
"epoch": 0.24,
"grad_norm": 0.9279077053070068,
"learning_rate": 8.915363837263782e-06,
"loss": 0.6351,
"step": 3745
},
{
"epoch": 0.24,
"grad_norm": 0.8428364396095276,
"learning_rate": 8.91472565849092e-06,
"loss": 0.5932,
"step": 3746
},
{
"epoch": 0.24,
"grad_norm": 0.892693817615509,
"learning_rate": 8.914087314882578e-06,
"loss": 0.585,
"step": 3747
},
{
"epoch": 0.24,
"grad_norm": 0.8900630474090576,
"learning_rate": 8.913448806465634e-06,
"loss": 0.6317,
"step": 3748
},
{
"epoch": 0.24,
"grad_norm": 0.8545112013816833,
"learning_rate": 8.912810133266976e-06,
"loss": 0.5925,
"step": 3749
},
{
"epoch": 0.24,
"grad_norm": 0.9142085313796997,
"learning_rate": 8.912171295313493e-06,
"loss": 0.5952,
"step": 3750
},
{
"epoch": 0.24,
"grad_norm": 0.8664583563804626,
"learning_rate": 8.911532292632089e-06,
"loss": 0.6369,
"step": 3751
},
{
"epoch": 0.24,
"grad_norm": 0.8957768678665161,
"learning_rate": 8.910893125249666e-06,
"loss": 0.6155,
"step": 3752
},
{
"epoch": 0.24,
"grad_norm": 0.9018309116363525,
"learning_rate": 8.91025379319314e-06,
"loss": 0.6574,
"step": 3753
},
{
"epoch": 0.24,
"grad_norm": 0.8896942734718323,
"learning_rate": 8.909614296489428e-06,
"loss": 0.5785,
"step": 3754
},
{
"epoch": 0.24,
"grad_norm": 0.9446683526039124,
"learning_rate": 8.908974635165458e-06,
"loss": 0.638,
"step": 3755
},
{
"epoch": 0.24,
"grad_norm": 0.8614102602005005,
"learning_rate": 8.908334809248165e-06,
"loss": 0.5967,
"step": 3756
},
{
"epoch": 0.24,
"grad_norm": 0.9426348805427551,
"learning_rate": 8.90769481876449e-06,
"loss": 0.6738,
"step": 3757
},
{
"epoch": 0.24,
"grad_norm": 0.9114719033241272,
"learning_rate": 8.90705466374138e-06,
"loss": 0.6332,
"step": 3758
},
{
"epoch": 0.24,
"grad_norm": 0.8968010544776917,
"learning_rate": 8.906414344205789e-06,
"loss": 0.6338,
"step": 3759
},
{
"epoch": 0.24,
"grad_norm": 0.8845388293266296,
"learning_rate": 8.905773860184679e-06,
"loss": 0.6355,
"step": 3760
},
{
"epoch": 0.24,
"grad_norm": 0.8717195391654968,
"learning_rate": 8.905133211705019e-06,
"loss": 0.593,
"step": 3761
},
{
"epoch": 0.24,
"grad_norm": 0.8622083067893982,
"learning_rate": 8.904492398793785e-06,
"loss": 0.632,
"step": 3762
},
{
"epoch": 0.24,
"grad_norm": 0.9208856225013733,
"learning_rate": 8.903851421477959e-06,
"loss": 0.6135,
"step": 3763
},
{
"epoch": 0.24,
"grad_norm": 0.8842298984527588,
"learning_rate": 8.90321027978453e-06,
"loss": 0.6295,
"step": 3764
},
{
"epoch": 0.24,
"grad_norm": 0.9459641575813293,
"learning_rate": 8.902568973740495e-06,
"loss": 0.5951,
"step": 3765
},
{
"epoch": 0.24,
"grad_norm": 0.9696717858314514,
"learning_rate": 8.901927503372855e-06,
"loss": 0.5996,
"step": 3766
},
{
"epoch": 0.24,
"grad_norm": 0.8983449935913086,
"learning_rate": 8.901285868708622e-06,
"loss": 0.6206,
"step": 3767
},
{
"epoch": 0.24,
"grad_norm": 0.8596554398536682,
"learning_rate": 8.900644069774815e-06,
"loss": 0.5802,
"step": 3768
},
{
"epoch": 0.24,
"grad_norm": 0.912438690662384,
"learning_rate": 8.900002106598453e-06,
"loss": 0.6058,
"step": 3769
},
{
"epoch": 0.24,
"grad_norm": 0.931678056716919,
"learning_rate": 8.89935997920657e-06,
"loss": 0.6331,
"step": 3770
},
{
"epoch": 0.24,
"grad_norm": 1.06976318359375,
"learning_rate": 8.898717687626203e-06,
"loss": 0.6401,
"step": 3771
},
{
"epoch": 0.24,
"grad_norm": 0.9052533507347107,
"learning_rate": 8.898075231884397e-06,
"loss": 0.6218,
"step": 3772
},
{
"epoch": 0.24,
"grad_norm": 0.8735697269439697,
"learning_rate": 8.897432612008206e-06,
"loss": 0.633,
"step": 3773
},
{
"epoch": 0.24,
"grad_norm": 0.8962618112564087,
"learning_rate": 8.896789828024682e-06,
"loss": 0.6216,
"step": 3774
},
{
"epoch": 0.24,
"grad_norm": 0.8915939927101135,
"learning_rate": 8.896146879960896e-06,
"loss": 0.6651,
"step": 3775
},
{
"epoch": 0.24,
"grad_norm": 0.877487063407898,
"learning_rate": 8.895503767843918e-06,
"loss": 0.6433,
"step": 3776
},
{
"epoch": 0.24,
"grad_norm": 0.9209939241409302,
"learning_rate": 8.89486049170083e-06,
"loss": 0.6573,
"step": 3777
},
{
"epoch": 0.24,
"grad_norm": 0.8641723990440369,
"learning_rate": 8.894217051558713e-06,
"loss": 0.596,
"step": 3778
},
{
"epoch": 0.24,
"grad_norm": 0.8952119946479797,
"learning_rate": 8.893573447444663e-06,
"loss": 0.6429,
"step": 3779
},
{
"epoch": 0.24,
"grad_norm": 0.8626795411109924,
"learning_rate": 8.892929679385783e-06,
"loss": 0.5635,
"step": 3780
},
{
"epoch": 0.24,
"grad_norm": 0.9164071679115295,
"learning_rate": 8.892285747409172e-06,
"loss": 0.5775,
"step": 3781
},
{
"epoch": 0.24,
"grad_norm": 0.8823123574256897,
"learning_rate": 8.891641651541953e-06,
"loss": 0.5881,
"step": 3782
},
{
"epoch": 0.24,
"grad_norm": 0.9153462648391724,
"learning_rate": 8.89099739181124e-06,
"loss": 0.5915,
"step": 3783
},
{
"epoch": 0.24,
"grad_norm": 0.9311332106590271,
"learning_rate": 8.890352968244162e-06,
"loss": 0.601,
"step": 3784
},
{
"epoch": 0.24,
"grad_norm": 0.9409120678901672,
"learning_rate": 8.889708380867856e-06,
"loss": 0.6608,
"step": 3785
},
{
"epoch": 0.24,
"grad_norm": 0.8671489357948303,
"learning_rate": 8.88906362970946e-06,
"loss": 0.5921,
"step": 3786
},
{
"epoch": 0.24,
"grad_norm": 0.8596804141998291,
"learning_rate": 8.888418714796124e-06,
"loss": 0.6256,
"step": 3787
},
{
"epoch": 0.24,
"grad_norm": 0.8811514377593994,
"learning_rate": 8.887773636155002e-06,
"loss": 0.5861,
"step": 3788
},
{
"epoch": 0.24,
"grad_norm": 0.900944709777832,
"learning_rate": 8.887128393813257e-06,
"loss": 0.641,
"step": 3789
},
{
"epoch": 0.24,
"grad_norm": 0.915507435798645,
"learning_rate": 8.886482987798059e-06,
"loss": 0.6348,
"step": 3790
},
{
"epoch": 0.24,
"grad_norm": 1.0151876211166382,
"learning_rate": 8.885837418136581e-06,
"loss": 0.5955,
"step": 3791
},
{
"epoch": 0.24,
"grad_norm": 0.8506528735160828,
"learning_rate": 8.885191684856007e-06,
"loss": 0.5497,
"step": 3792
},
{
"epoch": 0.24,
"grad_norm": 0.8645548224449158,
"learning_rate": 8.884545787983528e-06,
"loss": 0.6036,
"step": 3793
},
{
"epoch": 0.24,
"grad_norm": 0.8505982160568237,
"learning_rate": 8.88389972754634e-06,
"loss": 0.6379,
"step": 3794
},
{
"epoch": 0.24,
"grad_norm": 0.9695981740951538,
"learning_rate": 8.883253503571643e-06,
"loss": 0.632,
"step": 3795
},
{
"epoch": 0.24,
"grad_norm": 0.9560012817382812,
"learning_rate": 8.882607116086651e-06,
"loss": 0.6135,
"step": 3796
},
{
"epoch": 0.24,
"grad_norm": 0.8482503890991211,
"learning_rate": 8.881960565118581e-06,
"loss": 0.538,
"step": 3797
},
{
"epoch": 0.24,
"grad_norm": 0.9212302565574646,
"learning_rate": 8.881313850694653e-06,
"loss": 0.6599,
"step": 3798
},
{
"epoch": 0.24,
"grad_norm": 0.9493160843849182,
"learning_rate": 8.880666972842105e-06,
"loss": 0.6263,
"step": 3799
},
{
"epoch": 0.24,
"grad_norm": 0.8596429824829102,
"learning_rate": 8.880019931588167e-06,
"loss": 0.6504,
"step": 3800
},
{
"epoch": 0.24,
"grad_norm": 0.929779589176178,
"learning_rate": 8.87937272696009e-06,
"loss": 0.6391,
"step": 3801
},
{
"epoch": 0.24,
"grad_norm": 0.8671481013298035,
"learning_rate": 8.878725358985121e-06,
"loss": 0.5667,
"step": 3802
},
{
"epoch": 0.24,
"grad_norm": 0.9427719116210938,
"learning_rate": 8.87807782769052e-06,
"loss": 0.6795,
"step": 3803
},
{
"epoch": 0.24,
"grad_norm": 0.9037208557128906,
"learning_rate": 8.877430133103555e-06,
"loss": 0.6183,
"step": 3804
},
{
"epoch": 0.24,
"grad_norm": 0.8492844104766846,
"learning_rate": 8.876782275251491e-06,
"loss": 0.6273,
"step": 3805
},
{
"epoch": 0.24,
"grad_norm": 0.8471344113349915,
"learning_rate": 8.876134254161617e-06,
"loss": 0.6053,
"step": 3806
},
{
"epoch": 0.24,
"grad_norm": 0.8713465332984924,
"learning_rate": 8.87548606986121e-06,
"loss": 0.6451,
"step": 3807
},
{
"epoch": 0.24,
"grad_norm": 0.9716042280197144,
"learning_rate": 8.874837722377568e-06,
"loss": 0.5792,
"step": 3808
},
{
"epoch": 0.24,
"grad_norm": 0.8822860717773438,
"learning_rate": 8.87418921173799e-06,
"loss": 0.6024,
"step": 3809
},
{
"epoch": 0.24,
"grad_norm": 0.8905455470085144,
"learning_rate": 8.87354053796978e-06,
"loss": 0.5976,
"step": 3810
},
{
"epoch": 0.24,
"grad_norm": 0.807611346244812,
"learning_rate": 8.872891701100253e-06,
"loss": 0.6114,
"step": 3811
},
{
"epoch": 0.24,
"grad_norm": 0.9287991523742676,
"learning_rate": 8.872242701156731e-06,
"loss": 0.6195,
"step": 3812
},
{
"epoch": 0.24,
"grad_norm": 0.8870870471000671,
"learning_rate": 8.871593538166538e-06,
"loss": 0.6173,
"step": 3813
},
{
"epoch": 0.24,
"grad_norm": 1.0158964395523071,
"learning_rate": 8.870944212157008e-06,
"loss": 0.6278,
"step": 3814
},
{
"epoch": 0.24,
"grad_norm": 0.8998157382011414,
"learning_rate": 8.870294723155486e-06,
"loss": 0.6385,
"step": 3815
},
{
"epoch": 0.24,
"grad_norm": 0.9535521268844604,
"learning_rate": 8.869645071189316e-06,
"loss": 0.6515,
"step": 3816
},
{
"epoch": 0.24,
"grad_norm": 0.9406755566596985,
"learning_rate": 8.868995256285853e-06,
"loss": 0.6271,
"step": 3817
},
{
"epoch": 0.24,
"grad_norm": 0.920963704586029,
"learning_rate": 8.868345278472458e-06,
"loss": 0.6204,
"step": 3818
},
{
"epoch": 0.24,
"grad_norm": 0.9438266754150391,
"learning_rate": 8.867695137776503e-06,
"loss": 0.6417,
"step": 3819
},
{
"epoch": 0.24,
"grad_norm": 0.9606151580810547,
"learning_rate": 8.86704483422536e-06,
"loss": 0.6408,
"step": 3820
},
{
"epoch": 0.24,
"grad_norm": 0.8948151469230652,
"learning_rate": 8.86639436784641e-06,
"loss": 0.6276,
"step": 3821
},
{
"epoch": 0.24,
"grad_norm": 0.9214081168174744,
"learning_rate": 8.865743738667045e-06,
"loss": 0.5498,
"step": 3822
},
{
"epoch": 0.24,
"grad_norm": 0.8617424964904785,
"learning_rate": 8.865092946714657e-06,
"loss": 0.5677,
"step": 3823
},
{
"epoch": 0.24,
"grad_norm": 0.9291020035743713,
"learning_rate": 8.864441992016653e-06,
"loss": 0.5911,
"step": 3824
},
{
"epoch": 0.24,
"grad_norm": 0.9329352378845215,
"learning_rate": 8.863790874600438e-06,
"loss": 0.6073,
"step": 3825
},
{
"epoch": 0.24,
"grad_norm": 0.8821927309036255,
"learning_rate": 8.863139594493432e-06,
"loss": 0.5866,
"step": 3826
},
{
"epoch": 0.24,
"grad_norm": 0.8997513055801392,
"learning_rate": 8.862488151723055e-06,
"loss": 0.6081,
"step": 3827
},
{
"epoch": 0.24,
"grad_norm": 0.9646042585372925,
"learning_rate": 8.86183654631674e-06,
"loss": 0.6295,
"step": 3828
},
{
"epoch": 0.24,
"grad_norm": 0.9080867171287537,
"learning_rate": 8.861184778301921e-06,
"loss": 0.6282,
"step": 3829
},
{
"epoch": 0.24,
"grad_norm": 0.8966723084449768,
"learning_rate": 8.860532847706046e-06,
"loss": 0.637,
"step": 3830
},
{
"epoch": 0.24,
"grad_norm": 0.9197657704353333,
"learning_rate": 8.85988075455656e-06,
"loss": 0.5963,
"step": 3831
},
{
"epoch": 0.24,
"grad_norm": 0.9177777767181396,
"learning_rate": 8.859228498880923e-06,
"loss": 0.6453,
"step": 3832
},
{
"epoch": 0.24,
"grad_norm": 0.9342770576477051,
"learning_rate": 8.8585760807066e-06,
"loss": 0.6383,
"step": 3833
},
{
"epoch": 0.24,
"grad_norm": 0.9254716038703918,
"learning_rate": 8.85792350006106e-06,
"loss": 0.6608,
"step": 3834
},
{
"epoch": 0.24,
"grad_norm": 0.878808319568634,
"learning_rate": 8.857270756971785e-06,
"loss": 0.6036,
"step": 3835
},
{
"epoch": 0.24,
"grad_norm": 0.9698695540428162,
"learning_rate": 8.856617851466254e-06,
"loss": 0.6553,
"step": 3836
},
{
"epoch": 0.24,
"grad_norm": 0.8826630115509033,
"learning_rate": 8.855964783571963e-06,
"loss": 0.5691,
"step": 3837
},
{
"epoch": 0.24,
"grad_norm": 0.9340159296989441,
"learning_rate": 8.855311553316409e-06,
"loss": 0.5863,
"step": 3838
},
{
"epoch": 0.24,
"grad_norm": 0.8885470628738403,
"learning_rate": 8.854658160727096e-06,
"loss": 0.6368,
"step": 3839
},
{
"epoch": 0.24,
"grad_norm": 0.846393346786499,
"learning_rate": 8.854004605831536e-06,
"loss": 0.5378,
"step": 3840
},
{
"epoch": 0.24,
"grad_norm": 0.8811196684837341,
"learning_rate": 8.853350888657251e-06,
"loss": 0.6132,
"step": 3841
},
{
"epoch": 0.24,
"grad_norm": 0.9290794134140015,
"learning_rate": 8.852697009231766e-06,
"loss": 0.5925,
"step": 3842
},
{
"epoch": 0.24,
"grad_norm": 0.8985415697097778,
"learning_rate": 8.852042967582611e-06,
"loss": 0.6533,
"step": 3843
},
{
"epoch": 0.24,
"grad_norm": 0.8721175789833069,
"learning_rate": 8.851388763737328e-06,
"loss": 0.5439,
"step": 3844
},
{
"epoch": 0.24,
"grad_norm": 0.898200511932373,
"learning_rate": 8.850734397723461e-06,
"loss": 0.5901,
"step": 3845
},
{
"epoch": 0.24,
"grad_norm": 1.0981974601745605,
"learning_rate": 8.850079869568565e-06,
"loss": 0.6579,
"step": 3846
},
{
"epoch": 0.24,
"grad_norm": 0.8868777751922607,
"learning_rate": 8.849425179300197e-06,
"loss": 0.6113,
"step": 3847
},
{
"epoch": 0.24,
"grad_norm": 0.8843356370925903,
"learning_rate": 8.848770326945927e-06,
"loss": 0.5933,
"step": 3848
},
{
"epoch": 0.24,
"grad_norm": 0.9298630356788635,
"learning_rate": 8.84811531253333e-06,
"loss": 0.5776,
"step": 3849
},
{
"epoch": 0.24,
"grad_norm": 1.0039656162261963,
"learning_rate": 8.847460136089982e-06,
"loss": 0.6304,
"step": 3850
},
{
"epoch": 0.24,
"grad_norm": 0.8467380404472351,
"learning_rate": 8.846804797643472e-06,
"loss": 0.6048,
"step": 3851
},
{
"epoch": 0.24,
"grad_norm": 0.8640190958976746,
"learning_rate": 8.846149297221394e-06,
"loss": 0.5943,
"step": 3852
},
{
"epoch": 0.24,
"grad_norm": 0.9181884527206421,
"learning_rate": 8.845493634851348e-06,
"loss": 0.6935,
"step": 3853
},
{
"epoch": 0.24,
"grad_norm": 0.8371793627738953,
"learning_rate": 8.844837810560943e-06,
"loss": 0.5877,
"step": 3854
},
{
"epoch": 0.24,
"grad_norm": 0.9249871969223022,
"learning_rate": 8.844181824377793e-06,
"loss": 0.5707,
"step": 3855
},
{
"epoch": 0.24,
"grad_norm": 0.8807600140571594,
"learning_rate": 8.843525676329521e-06,
"loss": 0.6436,
"step": 3856
},
{
"epoch": 0.24,
"grad_norm": 0.8587551116943359,
"learning_rate": 8.842869366443751e-06,
"loss": 0.5952,
"step": 3857
},
{
"epoch": 0.24,
"grad_norm": 0.8563278317451477,
"learning_rate": 8.842212894748122e-06,
"loss": 0.5835,
"step": 3858
},
{
"epoch": 0.24,
"grad_norm": 0.8369274735450745,
"learning_rate": 8.841556261270272e-06,
"loss": 0.5843,
"step": 3859
},
{
"epoch": 0.24,
"grad_norm": 0.9252521395683289,
"learning_rate": 8.840899466037854e-06,
"loss": 0.6468,
"step": 3860
},
{
"epoch": 0.24,
"grad_norm": 0.874243438243866,
"learning_rate": 8.840242509078521e-06,
"loss": 0.5989,
"step": 3861
},
{
"epoch": 0.24,
"grad_norm": 0.8621048331260681,
"learning_rate": 8.839585390419933e-06,
"loss": 0.5763,
"step": 3862
},
{
"epoch": 0.24,
"grad_norm": 0.8070306777954102,
"learning_rate": 8.838928110089763e-06,
"loss": 0.6054,
"step": 3863
},
{
"epoch": 0.24,
"grad_norm": 0.9515740275382996,
"learning_rate": 8.838270668115685e-06,
"loss": 0.6457,
"step": 3864
},
{
"epoch": 0.24,
"grad_norm": 0.8527739644050598,
"learning_rate": 8.837613064525381e-06,
"loss": 0.5998,
"step": 3865
},
{
"epoch": 0.24,
"grad_norm": 0.9535593390464783,
"learning_rate": 8.83695529934654e-06,
"loss": 0.6252,
"step": 3866
},
{
"epoch": 0.24,
"grad_norm": 0.8122836351394653,
"learning_rate": 8.83629737260686e-06,
"loss": 0.5928,
"step": 3867
},
{
"epoch": 0.25,
"grad_norm": 0.9340097904205322,
"learning_rate": 8.835639284334043e-06,
"loss": 0.5719,
"step": 3868
},
{
"epoch": 0.25,
"grad_norm": 0.9119397401809692,
"learning_rate": 8.834981034555799e-06,
"loss": 0.6028,
"step": 3869
},
{
"epoch": 0.25,
"grad_norm": 0.8478021025657654,
"learning_rate": 8.834322623299844e-06,
"loss": 0.5882,
"step": 3870
},
{
"epoch": 0.25,
"grad_norm": 0.9423801898956299,
"learning_rate": 8.833664050593904e-06,
"loss": 0.5901,
"step": 3871
},
{
"epoch": 0.25,
"grad_norm": 0.973012387752533,
"learning_rate": 8.833005316465706e-06,
"loss": 0.5702,
"step": 3872
},
{
"epoch": 0.25,
"grad_norm": 0.870364785194397,
"learning_rate": 8.832346420942987e-06,
"loss": 0.5943,
"step": 3873
},
{
"epoch": 0.25,
"grad_norm": 0.8896936774253845,
"learning_rate": 8.831687364053493e-06,
"loss": 0.6135,
"step": 3874
},
{
"epoch": 0.25,
"grad_norm": 0.9121167063713074,
"learning_rate": 8.831028145824974e-06,
"loss": 0.639,
"step": 3875
},
{
"epoch": 0.25,
"grad_norm": 0.9295619130134583,
"learning_rate": 8.830368766285186e-06,
"loss": 0.6404,
"step": 3876
},
{
"epoch": 0.25,
"grad_norm": 0.9236605763435364,
"learning_rate": 8.829709225461894e-06,
"loss": 0.596,
"step": 3877
},
{
"epoch": 0.25,
"grad_norm": 1.0370179414749146,
"learning_rate": 8.829049523382871e-06,
"loss": 0.6572,
"step": 3878
},
{
"epoch": 0.25,
"grad_norm": 0.8750087022781372,
"learning_rate": 8.828389660075891e-06,
"loss": 0.6232,
"step": 3879
},
{
"epoch": 0.25,
"grad_norm": 0.8742169141769409,
"learning_rate": 8.82772963556874e-06,
"loss": 0.6312,
"step": 3880
},
{
"epoch": 0.25,
"grad_norm": 0.8765554428100586,
"learning_rate": 8.827069449889211e-06,
"loss": 0.58,
"step": 3881
},
{
"epoch": 0.25,
"grad_norm": 0.9164361357688904,
"learning_rate": 8.8264091030651e-06,
"loss": 0.6194,
"step": 3882
},
{
"epoch": 0.25,
"grad_norm": 0.914909839630127,
"learning_rate": 8.825748595124214e-06,
"loss": 0.6188,
"step": 3883
},
{
"epoch": 0.25,
"grad_norm": 0.88898104429245,
"learning_rate": 8.825087926094363e-06,
"loss": 0.5854,
"step": 3884
},
{
"epoch": 0.25,
"grad_norm": 0.8506219387054443,
"learning_rate": 8.824427096003367e-06,
"loss": 0.5805,
"step": 3885
},
{
"epoch": 0.25,
"grad_norm": 0.9433155655860901,
"learning_rate": 8.823766104879047e-06,
"loss": 0.5827,
"step": 3886
},
{
"epoch": 0.25,
"grad_norm": 0.8702185153961182,
"learning_rate": 8.823104952749242e-06,
"loss": 0.5661,
"step": 3887
},
{
"epoch": 0.25,
"grad_norm": 0.8791462779045105,
"learning_rate": 8.822443639641785e-06,
"loss": 0.5424,
"step": 3888
},
{
"epoch": 0.25,
"grad_norm": 0.8864879012107849,
"learning_rate": 8.821782165584524e-06,
"loss": 0.6041,
"step": 3889
},
{
"epoch": 0.25,
"grad_norm": 0.9141310453414917,
"learning_rate": 8.82112053060531e-06,
"loss": 0.6335,
"step": 3890
},
{
"epoch": 0.25,
"grad_norm": 0.9409934878349304,
"learning_rate": 8.820458734732004e-06,
"loss": 0.6872,
"step": 3891
},
{
"epoch": 0.25,
"grad_norm": 0.9157419204711914,
"learning_rate": 8.819796777992471e-06,
"loss": 0.5836,
"step": 3892
},
{
"epoch": 0.25,
"grad_norm": 0.957832396030426,
"learning_rate": 8.819134660414585e-06,
"loss": 0.6145,
"step": 3893
},
{
"epoch": 0.25,
"grad_norm": 0.9433353543281555,
"learning_rate": 8.818472382026222e-06,
"loss": 0.573,
"step": 3894
},
{
"epoch": 0.25,
"grad_norm": 0.919173002243042,
"learning_rate": 8.817809942855272e-06,
"loss": 0.5815,
"step": 3895
},
{
"epoch": 0.25,
"grad_norm": 0.8651015758514404,
"learning_rate": 8.817147342929626e-06,
"loss": 0.5762,
"step": 3896
},
{
"epoch": 0.25,
"grad_norm": 0.9625697135925293,
"learning_rate": 8.816484582277184e-06,
"loss": 0.6389,
"step": 3897
},
{
"epoch": 0.25,
"grad_norm": 0.8946129083633423,
"learning_rate": 8.815821660925853e-06,
"loss": 0.6084,
"step": 3898
},
{
"epoch": 0.25,
"grad_norm": 0.9177218079566956,
"learning_rate": 8.815158578903548e-06,
"loss": 0.6022,
"step": 3899
},
{
"epoch": 0.25,
"grad_norm": 0.8781201243400574,
"learning_rate": 8.814495336238185e-06,
"loss": 0.6393,
"step": 3900
},
{
"epoch": 0.25,
"grad_norm": 0.9286174774169922,
"learning_rate": 8.813831932957696e-06,
"loss": 0.6149,
"step": 3901
},
{
"epoch": 0.25,
"grad_norm": 0.882340669631958,
"learning_rate": 8.813168369090007e-06,
"loss": 0.5349,
"step": 3902
},
{
"epoch": 0.25,
"grad_norm": 0.8473665118217468,
"learning_rate": 8.812504644663066e-06,
"loss": 0.5991,
"step": 3903
},
{
"epoch": 0.25,
"grad_norm": 1.013710618019104,
"learning_rate": 8.811840759704816e-06,
"loss": 0.6184,
"step": 3904
},
{
"epoch": 0.25,
"grad_norm": 0.8682031631469727,
"learning_rate": 8.811176714243213e-06,
"loss": 0.6179,
"step": 3905
},
{
"epoch": 0.25,
"grad_norm": 0.9201847314834595,
"learning_rate": 8.810512508306216e-06,
"loss": 0.5807,
"step": 3906
},
{
"epoch": 0.25,
"grad_norm": 0.8606781959533691,
"learning_rate": 8.809848141921793e-06,
"loss": 0.5846,
"step": 3907
},
{
"epoch": 0.25,
"grad_norm": 0.9430428743362427,
"learning_rate": 8.809183615117919e-06,
"loss": 0.6372,
"step": 3908
},
{
"epoch": 0.25,
"grad_norm": 0.8924831748008728,
"learning_rate": 8.808518927922574e-06,
"loss": 0.6182,
"step": 3909
},
{
"epoch": 0.25,
"grad_norm": 0.9287380576133728,
"learning_rate": 8.807854080363745e-06,
"loss": 0.6251,
"step": 3910
},
{
"epoch": 0.25,
"grad_norm": 0.9271407723426819,
"learning_rate": 8.807189072469428e-06,
"loss": 0.6197,
"step": 3911
},
{
"epoch": 0.25,
"grad_norm": 0.8575233817100525,
"learning_rate": 8.806523904267623e-06,
"loss": 0.6011,
"step": 3912
},
{
"epoch": 0.25,
"grad_norm": 1.0265129804611206,
"learning_rate": 8.80585857578634e-06,
"loss": 0.6534,
"step": 3913
},
{
"epoch": 0.25,
"grad_norm": 0.8787725567817688,
"learning_rate": 8.80519308705359e-06,
"loss": 0.5598,
"step": 3914
},
{
"epoch": 0.25,
"grad_norm": 0.9931519031524658,
"learning_rate": 8.804527438097396e-06,
"loss": 0.624,
"step": 3915
},
{
"epoch": 0.25,
"grad_norm": 0.9081161022186279,
"learning_rate": 8.803861628945787e-06,
"loss": 0.5939,
"step": 3916
},
{
"epoch": 0.25,
"grad_norm": 0.9506007432937622,
"learning_rate": 8.803195659626798e-06,
"loss": 0.6593,
"step": 3917
},
{
"epoch": 0.25,
"grad_norm": 0.8824777603149414,
"learning_rate": 8.802529530168469e-06,
"loss": 0.6381,
"step": 3918
},
{
"epoch": 0.25,
"grad_norm": 0.8718113899230957,
"learning_rate": 8.801863240598851e-06,
"loss": 0.6002,
"step": 3919
},
{
"epoch": 0.25,
"grad_norm": 0.880943238735199,
"learning_rate": 8.801196790945999e-06,
"loss": 0.587,
"step": 3920
},
{
"epoch": 0.25,
"grad_norm": 0.9570931196212769,
"learning_rate": 8.800530181237971e-06,
"loss": 0.6615,
"step": 3921
},
{
"epoch": 0.25,
"grad_norm": 0.9796764254570007,
"learning_rate": 8.799863411502838e-06,
"loss": 0.5868,
"step": 3922
},
{
"epoch": 0.25,
"grad_norm": 0.8545233607292175,
"learning_rate": 8.799196481768677e-06,
"loss": 0.6008,
"step": 3923
},
{
"epoch": 0.25,
"grad_norm": 0.8299331068992615,
"learning_rate": 8.798529392063569e-06,
"loss": 0.5954,
"step": 3924
},
{
"epoch": 0.25,
"grad_norm": 0.8435283899307251,
"learning_rate": 8.7978621424156e-06,
"loss": 0.5738,
"step": 3925
},
{
"epoch": 0.25,
"grad_norm": 0.9209175109863281,
"learning_rate": 8.79719473285287e-06,
"loss": 0.64,
"step": 3926
},
{
"epoch": 0.25,
"grad_norm": 0.9299062490463257,
"learning_rate": 8.796527163403479e-06,
"loss": 0.6396,
"step": 3927
},
{
"epoch": 0.25,
"grad_norm": 0.9327616691589355,
"learning_rate": 8.795859434095535e-06,
"loss": 0.6079,
"step": 3928
},
{
"epoch": 0.25,
"grad_norm": 0.9269071221351624,
"learning_rate": 8.795191544957156e-06,
"loss": 0.6834,
"step": 3929
},
{
"epoch": 0.25,
"grad_norm": 0.9255284667015076,
"learning_rate": 8.794523496016465e-06,
"loss": 0.6848,
"step": 3930
},
{
"epoch": 0.25,
"grad_norm": 0.8709956407546997,
"learning_rate": 8.793855287301588e-06,
"loss": 0.6473,
"step": 3931
},
{
"epoch": 0.25,
"grad_norm": 0.840023934841156,
"learning_rate": 8.793186918840661e-06,
"loss": 0.5463,
"step": 3932
},
{
"epoch": 0.25,
"grad_norm": 0.9407967925071716,
"learning_rate": 8.792518390661831e-06,
"loss": 0.6065,
"step": 3933
},
{
"epoch": 0.25,
"grad_norm": 0.8985733985900879,
"learning_rate": 8.791849702793245e-06,
"loss": 0.6236,
"step": 3934
},
{
"epoch": 0.25,
"grad_norm": 0.8958050012588501,
"learning_rate": 8.791180855263057e-06,
"loss": 0.6322,
"step": 3935
},
{
"epoch": 0.25,
"grad_norm": 0.9496500492095947,
"learning_rate": 8.790511848099433e-06,
"loss": 0.6399,
"step": 3936
},
{
"epoch": 0.25,
"grad_norm": 0.9206477999687195,
"learning_rate": 8.789842681330543e-06,
"loss": 0.6232,
"step": 3937
},
{
"epoch": 0.25,
"grad_norm": 0.9060776829719543,
"learning_rate": 8.789173354984557e-06,
"loss": 0.5971,
"step": 3938
},
{
"epoch": 0.25,
"grad_norm": 0.9292250871658325,
"learning_rate": 8.788503869089667e-06,
"loss": 0.6796,
"step": 3939
},
{
"epoch": 0.25,
"grad_norm": 0.8954676985740662,
"learning_rate": 8.787834223674056e-06,
"loss": 0.6061,
"step": 3940
},
{
"epoch": 0.25,
"grad_norm": 0.8948878049850464,
"learning_rate": 8.787164418765923e-06,
"loss": 0.5726,
"step": 3941
},
{
"epoch": 0.25,
"grad_norm": 0.9730081558227539,
"learning_rate": 8.786494454393472e-06,
"loss": 0.6271,
"step": 3942
},
{
"epoch": 0.25,
"grad_norm": 0.9321277737617493,
"learning_rate": 8.785824330584912e-06,
"loss": 0.5956,
"step": 3943
},
{
"epoch": 0.25,
"grad_norm": 0.9635143280029297,
"learning_rate": 8.785154047368459e-06,
"loss": 0.6239,
"step": 3944
},
{
"epoch": 0.25,
"grad_norm": 0.9317022562026978,
"learning_rate": 8.784483604772336e-06,
"loss": 0.6193,
"step": 3945
},
{
"epoch": 0.25,
"grad_norm": 0.893115222454071,
"learning_rate": 8.783813002824773e-06,
"loss": 0.617,
"step": 3946
},
{
"epoch": 0.25,
"grad_norm": 0.899761974811554,
"learning_rate": 8.783142241554009e-06,
"loss": 0.5963,
"step": 3947
},
{
"epoch": 0.25,
"grad_norm": 0.8902785181999207,
"learning_rate": 8.782471320988284e-06,
"loss": 0.6318,
"step": 3948
},
{
"epoch": 0.25,
"grad_norm": 0.8925158381462097,
"learning_rate": 8.781800241155851e-06,
"loss": 0.5684,
"step": 3949
},
{
"epoch": 0.25,
"grad_norm": 0.9196040630340576,
"learning_rate": 8.781129002084965e-06,
"loss": 0.5899,
"step": 3950
},
{
"epoch": 0.25,
"grad_norm": 0.9138063192367554,
"learning_rate": 8.780457603803892e-06,
"loss": 0.6088,
"step": 3951
},
{
"epoch": 0.25,
"grad_norm": 0.8876779675483704,
"learning_rate": 8.779786046340898e-06,
"loss": 0.6453,
"step": 3952
},
{
"epoch": 0.25,
"grad_norm": 0.9258411526679993,
"learning_rate": 8.779114329724265e-06,
"loss": 0.6308,
"step": 3953
},
{
"epoch": 0.25,
"grad_norm": 0.8825391530990601,
"learning_rate": 8.778442453982272e-06,
"loss": 0.5773,
"step": 3954
},
{
"epoch": 0.25,
"grad_norm": 0.8886011242866516,
"learning_rate": 8.777770419143214e-06,
"loss": 0.646,
"step": 3955
},
{
"epoch": 0.25,
"grad_norm": 0.9516189694404602,
"learning_rate": 8.777098225235384e-06,
"loss": 0.6543,
"step": 3956
},
{
"epoch": 0.25,
"grad_norm": 0.9398981928825378,
"learning_rate": 8.776425872287087e-06,
"loss": 0.6067,
"step": 3957
},
{
"epoch": 0.25,
"grad_norm": 0.9179983139038086,
"learning_rate": 8.775753360326635e-06,
"loss": 0.6425,
"step": 3958
},
{
"epoch": 0.25,
"grad_norm": 0.8767003417015076,
"learning_rate": 8.775080689382342e-06,
"loss": 0.6333,
"step": 3959
},
{
"epoch": 0.25,
"grad_norm": 0.8714125752449036,
"learning_rate": 8.774407859482537e-06,
"loss": 0.6302,
"step": 3960
},
{
"epoch": 0.25,
"grad_norm": 0.8385068774223328,
"learning_rate": 8.773734870655544e-06,
"loss": 0.6087,
"step": 3961
},
{
"epoch": 0.25,
"grad_norm": 0.9136397242546082,
"learning_rate": 8.773061722929704e-06,
"loss": 0.6035,
"step": 3962
},
{
"epoch": 0.25,
"grad_norm": 0.805779755115509,
"learning_rate": 8.772388416333361e-06,
"loss": 0.5575,
"step": 3963
},
{
"epoch": 0.25,
"grad_norm": 0.9417558908462524,
"learning_rate": 8.771714950894865e-06,
"loss": 0.5909,
"step": 3964
},
{
"epoch": 0.25,
"grad_norm": 0.922148585319519,
"learning_rate": 8.771041326642572e-06,
"loss": 0.6075,
"step": 3965
},
{
"epoch": 0.25,
"grad_norm": 0.9802806377410889,
"learning_rate": 8.770367543604849e-06,
"loss": 0.6446,
"step": 3966
},
{
"epoch": 0.25,
"grad_norm": 0.924773633480072,
"learning_rate": 8.769693601810066e-06,
"loss": 0.6432,
"step": 3967
},
{
"epoch": 0.25,
"grad_norm": 0.8747174143791199,
"learning_rate": 8.769019501286598e-06,
"loss": 0.5868,
"step": 3968
},
{
"epoch": 0.25,
"grad_norm": 0.9166977405548096,
"learning_rate": 8.768345242062828e-06,
"loss": 0.6624,
"step": 3969
},
{
"epoch": 0.25,
"grad_norm": 0.886821985244751,
"learning_rate": 8.767670824167151e-06,
"loss": 0.6774,
"step": 3970
},
{
"epoch": 0.25,
"grad_norm": 0.8805607557296753,
"learning_rate": 8.766996247627963e-06,
"loss": 0.6026,
"step": 3971
},
{
"epoch": 0.25,
"grad_norm": 0.8670737743377686,
"learning_rate": 8.766321512473666e-06,
"loss": 0.6216,
"step": 3972
},
{
"epoch": 0.25,
"grad_norm": 0.9067496657371521,
"learning_rate": 8.765646618732672e-06,
"loss": 0.6065,
"step": 3973
},
{
"epoch": 0.25,
"grad_norm": 0.8839542269706726,
"learning_rate": 8.7649715664334e-06,
"loss": 0.6026,
"step": 3974
},
{
"epoch": 0.25,
"grad_norm": 0.915699303150177,
"learning_rate": 8.764296355604273e-06,
"loss": 0.6433,
"step": 3975
},
{
"epoch": 0.25,
"grad_norm": 0.8603993654251099,
"learning_rate": 8.76362098627372e-06,
"loss": 0.6031,
"step": 3976
},
{
"epoch": 0.25,
"grad_norm": 0.9431526064872742,
"learning_rate": 8.76294545847018e-06,
"loss": 0.6777,
"step": 3977
},
{
"epoch": 0.25,
"grad_norm": 0.919879138469696,
"learning_rate": 8.762269772222099e-06,
"loss": 0.5918,
"step": 3978
},
{
"epoch": 0.25,
"grad_norm": 0.8991773128509521,
"learning_rate": 8.761593927557923e-06,
"loss": 0.6318,
"step": 3979
},
{
"epoch": 0.25,
"grad_norm": 0.8900842666625977,
"learning_rate": 8.760917924506114e-06,
"loss": 0.578,
"step": 3980
},
{
"epoch": 0.25,
"grad_norm": 1.0085675716400146,
"learning_rate": 8.760241763095135e-06,
"loss": 0.6554,
"step": 3981
},
{
"epoch": 0.25,
"grad_norm": 0.9195557832717896,
"learning_rate": 8.759565443353454e-06,
"loss": 0.6484,
"step": 3982
},
{
"epoch": 0.25,
"grad_norm": 0.9673278331756592,
"learning_rate": 8.758888965309554e-06,
"loss": 0.6418,
"step": 3983
},
{
"epoch": 0.25,
"grad_norm": 0.9138756394386292,
"learning_rate": 8.758212328991913e-06,
"loss": 0.6098,
"step": 3984
},
{
"epoch": 0.25,
"grad_norm": 0.9599946737289429,
"learning_rate": 8.757535534429027e-06,
"loss": 0.6413,
"step": 3985
},
{
"epoch": 0.25,
"grad_norm": 0.9634223580360413,
"learning_rate": 8.756858581649391e-06,
"loss": 0.6299,
"step": 3986
},
{
"epoch": 0.25,
"grad_norm": 0.8933982253074646,
"learning_rate": 8.756181470681507e-06,
"loss": 0.642,
"step": 3987
},
{
"epoch": 0.25,
"grad_norm": 0.8917509317398071,
"learning_rate": 8.755504201553889e-06,
"loss": 0.6301,
"step": 3988
},
{
"epoch": 0.25,
"grad_norm": 0.9482274651527405,
"learning_rate": 8.754826774295056e-06,
"loss": 0.6465,
"step": 3989
},
{
"epoch": 0.25,
"grad_norm": 0.9077238440513611,
"learning_rate": 8.754149188933527e-06,
"loss": 0.5909,
"step": 3990
},
{
"epoch": 0.25,
"grad_norm": 0.9035444855690002,
"learning_rate": 8.753471445497837e-06,
"loss": 0.5492,
"step": 3991
},
{
"epoch": 0.25,
"grad_norm": 0.8981434106826782,
"learning_rate": 8.752793544016519e-06,
"loss": 0.6003,
"step": 3992
},
{
"epoch": 0.25,
"grad_norm": 0.9048976898193359,
"learning_rate": 8.752115484518123e-06,
"loss": 0.656,
"step": 3993
},
{
"epoch": 0.25,
"grad_norm": 0.9182979464530945,
"learning_rate": 8.751437267031194e-06,
"loss": 0.6331,
"step": 3994
},
{
"epoch": 0.25,
"grad_norm": 0.9162821173667908,
"learning_rate": 8.750758891584293e-06,
"loss": 0.6385,
"step": 3995
},
{
"epoch": 0.25,
"grad_norm": 0.882770836353302,
"learning_rate": 8.750080358205983e-06,
"loss": 0.5651,
"step": 3996
},
{
"epoch": 0.25,
"grad_norm": 0.8625838756561279,
"learning_rate": 8.749401666924834e-06,
"loss": 0.5548,
"step": 3997
},
{
"epoch": 0.25,
"grad_norm": 0.8796778917312622,
"learning_rate": 8.748722817769426e-06,
"loss": 0.6218,
"step": 3998
},
{
"epoch": 0.25,
"grad_norm": 0.8950878977775574,
"learning_rate": 8.74804381076834e-06,
"loss": 0.591,
"step": 3999
},
{
"epoch": 0.25,
"grad_norm": 0.9669718742370605,
"learning_rate": 8.747364645950168e-06,
"loss": 0.6653,
"step": 4000
},
{
"epoch": 0.25,
"grad_norm": 0.8975842595100403,
"learning_rate": 8.746685323343507e-06,
"loss": 0.6355,
"step": 4001
},
{
"epoch": 0.25,
"grad_norm": 0.8494351506233215,
"learning_rate": 8.74600584297696e-06,
"loss": 0.5891,
"step": 4002
},
{
"epoch": 0.25,
"grad_norm": 0.8773183226585388,
"learning_rate": 8.745326204879139e-06,
"loss": 0.6023,
"step": 4003
},
{
"epoch": 0.25,
"grad_norm": 0.8296153545379639,
"learning_rate": 8.74464640907866e-06,
"loss": 0.5928,
"step": 4004
},
{
"epoch": 0.25,
"grad_norm": 0.989004909992218,
"learning_rate": 8.743966455604147e-06,
"loss": 0.6234,
"step": 4005
},
{
"epoch": 0.25,
"grad_norm": 0.8475044369697571,
"learning_rate": 8.743286344484232e-06,
"loss": 0.5406,
"step": 4006
},
{
"epoch": 0.25,
"grad_norm": 0.8195810317993164,
"learning_rate": 8.74260607574755e-06,
"loss": 0.5639,
"step": 4007
},
{
"epoch": 0.25,
"grad_norm": 0.909238874912262,
"learning_rate": 8.741925649422746e-06,
"loss": 0.5854,
"step": 4008
},
{
"epoch": 0.25,
"grad_norm": 0.9121100902557373,
"learning_rate": 8.741245065538471e-06,
"loss": 0.614,
"step": 4009
},
{
"epoch": 0.25,
"grad_norm": 0.8657447695732117,
"learning_rate": 8.74056432412338e-06,
"loss": 0.568,
"step": 4010
},
{
"epoch": 0.25,
"grad_norm": 0.808589518070221,
"learning_rate": 8.739883425206138e-06,
"loss": 0.598,
"step": 4011
},
{
"epoch": 0.25,
"grad_norm": 0.9169565439224243,
"learning_rate": 8.739202368815416e-06,
"loss": 0.6287,
"step": 4012
},
{
"epoch": 0.25,
"grad_norm": 0.9050797820091248,
"learning_rate": 8.738521154979889e-06,
"loss": 0.6202,
"step": 4013
},
{
"epoch": 0.25,
"grad_norm": 0.9150273203849792,
"learning_rate": 8.737839783728242e-06,
"loss": 0.6038,
"step": 4014
},
{
"epoch": 0.25,
"grad_norm": 0.9774922132492065,
"learning_rate": 8.737158255089164e-06,
"loss": 0.6055,
"step": 4015
},
{
"epoch": 0.25,
"grad_norm": 0.855354905128479,
"learning_rate": 8.736476569091352e-06,
"loss": 0.5831,
"step": 4016
},
{
"epoch": 0.25,
"grad_norm": 0.8585079312324524,
"learning_rate": 8.735794725763512e-06,
"loss": 0.6254,
"step": 4017
},
{
"epoch": 0.25,
"grad_norm": 0.9431387782096863,
"learning_rate": 8.735112725134352e-06,
"loss": 0.5971,
"step": 4018
},
{
"epoch": 0.25,
"grad_norm": 0.9413880109786987,
"learning_rate": 8.734430567232585e-06,
"loss": 0.6035,
"step": 4019
},
{
"epoch": 0.25,
"grad_norm": 0.8787413835525513,
"learning_rate": 8.733748252086943e-06,
"loss": 0.6233,
"step": 4020
},
{
"epoch": 0.25,
"grad_norm": 0.9067035913467407,
"learning_rate": 8.733065779726146e-06,
"loss": 0.6869,
"step": 4021
},
{
"epoch": 0.25,
"grad_norm": 0.8852519392967224,
"learning_rate": 8.732383150178938e-06,
"loss": 0.6373,
"step": 4022
},
{
"epoch": 0.25,
"grad_norm": 0.9651377201080322,
"learning_rate": 8.73170036347406e-06,
"loss": 0.6788,
"step": 4023
},
{
"epoch": 0.25,
"grad_norm": 0.8923559784889221,
"learning_rate": 8.731017419640261e-06,
"loss": 0.6376,
"step": 4024
},
{
"epoch": 0.26,
"grad_norm": 0.9307226538658142,
"learning_rate": 8.730334318706297e-06,
"loss": 0.5993,
"step": 4025
},
{
"epoch": 0.26,
"grad_norm": 0.9229474067687988,
"learning_rate": 8.729651060700932e-06,
"loss": 0.6617,
"step": 4026
},
{
"epoch": 0.26,
"grad_norm": 0.863122284412384,
"learning_rate": 8.728967645652936e-06,
"loss": 0.5719,
"step": 4027
},
{
"epoch": 0.26,
"grad_norm": 0.9152368903160095,
"learning_rate": 8.728284073591083e-06,
"loss": 0.6337,
"step": 4028
},
{
"epoch": 0.26,
"grad_norm": 0.922824501991272,
"learning_rate": 8.727600344544159e-06,
"loss": 0.6418,
"step": 4029
},
{
"epoch": 0.26,
"grad_norm": 0.8921812176704407,
"learning_rate": 8.72691645854095e-06,
"loss": 0.625,
"step": 4030
},
{
"epoch": 0.26,
"grad_norm": 1.0182279348373413,
"learning_rate": 8.726232415610257e-06,
"loss": 0.6637,
"step": 4031
},
{
"epoch": 0.26,
"grad_norm": 0.8648515343666077,
"learning_rate": 8.725548215780877e-06,
"loss": 0.5921,
"step": 4032
},
{
"epoch": 0.26,
"grad_norm": 0.8446393609046936,
"learning_rate": 8.724863859081622e-06,
"loss": 0.6198,
"step": 4033
},
{
"epoch": 0.26,
"grad_norm": 0.8602930307388306,
"learning_rate": 8.724179345541308e-06,
"loss": 0.5806,
"step": 4034
},
{
"epoch": 0.26,
"grad_norm": 0.896755576133728,
"learning_rate": 8.72349467518876e-06,
"loss": 0.6625,
"step": 4035
},
{
"epoch": 0.26,
"grad_norm": 0.8296254277229309,
"learning_rate": 8.7228098480528e-06,
"loss": 0.6138,
"step": 4036
},
{
"epoch": 0.26,
"grad_norm": 0.9342056512832642,
"learning_rate": 8.72212486416227e-06,
"loss": 0.6454,
"step": 4037
},
{
"epoch": 0.26,
"grad_norm": 0.9745578169822693,
"learning_rate": 8.721439723546012e-06,
"loss": 0.6671,
"step": 4038
},
{
"epoch": 0.26,
"grad_norm": 0.9331424832344055,
"learning_rate": 8.720754426232871e-06,
"loss": 0.5942,
"step": 4039
},
{
"epoch": 0.26,
"grad_norm": 0.9035102128982544,
"learning_rate": 8.720068972251705e-06,
"loss": 0.6128,
"step": 4040
},
{
"epoch": 0.26,
"grad_norm": 0.8807538747787476,
"learning_rate": 8.719383361631376e-06,
"loss": 0.6637,
"step": 4041
},
{
"epoch": 0.26,
"grad_norm": 0.8650099635124207,
"learning_rate": 8.718697594400753e-06,
"loss": 0.6391,
"step": 4042
},
{
"epoch": 0.26,
"grad_norm": 0.8635523915290833,
"learning_rate": 8.71801167058871e-06,
"loss": 0.6161,
"step": 4043
},
{
"epoch": 0.26,
"grad_norm": 0.8900404572486877,
"learning_rate": 8.717325590224129e-06,
"loss": 0.6197,
"step": 4044
},
{
"epoch": 0.26,
"grad_norm": 0.9338827133178711,
"learning_rate": 8.7166393533359e-06,
"loss": 0.6293,
"step": 4045
},
{
"epoch": 0.26,
"grad_norm": 0.9154714941978455,
"learning_rate": 8.715952959952917e-06,
"loss": 0.6291,
"step": 4046
},
{
"epoch": 0.26,
"grad_norm": 0.8892160058021545,
"learning_rate": 8.715266410104081e-06,
"loss": 0.6401,
"step": 4047
},
{
"epoch": 0.26,
"grad_norm": 0.8630048036575317,
"learning_rate": 8.714579703818301e-06,
"loss": 0.6683,
"step": 4048
},
{
"epoch": 0.26,
"grad_norm": 0.8822508454322815,
"learning_rate": 8.713892841124492e-06,
"loss": 0.6048,
"step": 4049
},
{
"epoch": 0.26,
"grad_norm": 0.9436633586883545,
"learning_rate": 8.713205822051576e-06,
"loss": 0.6598,
"step": 4050
},
{
"epoch": 0.26,
"grad_norm": 0.8699237704277039,
"learning_rate": 8.71251864662848e-06,
"loss": 0.607,
"step": 4051
},
{
"epoch": 0.26,
"grad_norm": 0.9008539915084839,
"learning_rate": 8.711831314884137e-06,
"loss": 0.6121,
"step": 4052
},
{
"epoch": 0.26,
"grad_norm": 0.8727585077285767,
"learning_rate": 8.711143826847491e-06,
"loss": 0.6199,
"step": 4053
},
{
"epoch": 0.26,
"grad_norm": 0.8655484914779663,
"learning_rate": 8.71045618254749e-06,
"loss": 0.58,
"step": 4054
},
{
"epoch": 0.26,
"grad_norm": 0.9389612078666687,
"learning_rate": 8.709768382013084e-06,
"loss": 0.591,
"step": 4055
},
{
"epoch": 0.26,
"grad_norm": 0.9785065650939941,
"learning_rate": 8.709080425273238e-06,
"loss": 0.6502,
"step": 4056
},
{
"epoch": 0.26,
"grad_norm": 0.9361227750778198,
"learning_rate": 8.708392312356919e-06,
"loss": 0.6516,
"step": 4057
},
{
"epoch": 0.26,
"grad_norm": 0.9632472395896912,
"learning_rate": 8.7077040432931e-06,
"loss": 0.7088,
"step": 4058
},
{
"epoch": 0.26,
"grad_norm": 0.9708462953567505,
"learning_rate": 8.707015618110761e-06,
"loss": 0.6293,
"step": 4059
},
{
"epoch": 0.26,
"grad_norm": 0.8739571571350098,
"learning_rate": 8.706327036838891e-06,
"loss": 0.6009,
"step": 4060
},
{
"epoch": 0.26,
"grad_norm": 0.8531939387321472,
"learning_rate": 8.705638299506482e-06,
"loss": 0.5739,
"step": 4061
},
{
"epoch": 0.26,
"grad_norm": 1.007936716079712,
"learning_rate": 8.704949406142536e-06,
"loss": 0.6615,
"step": 4062
},
{
"epoch": 0.26,
"grad_norm": 0.9138129949569702,
"learning_rate": 8.70426035677606e-06,
"loss": 0.6759,
"step": 4063
},
{
"epoch": 0.26,
"grad_norm": 0.8993487358093262,
"learning_rate": 8.703571151436064e-06,
"loss": 0.5449,
"step": 4064
},
{
"epoch": 0.26,
"grad_norm": 0.9341960549354553,
"learning_rate": 8.702881790151572e-06,
"loss": 0.5835,
"step": 4065
},
{
"epoch": 0.26,
"grad_norm": 0.9405590891838074,
"learning_rate": 8.70219227295161e-06,
"loss": 0.7077,
"step": 4066
},
{
"epoch": 0.26,
"grad_norm": 0.8309204578399658,
"learning_rate": 8.70150259986521e-06,
"loss": 0.5906,
"step": 4067
},
{
"epoch": 0.26,
"grad_norm": 0.9234378337860107,
"learning_rate": 8.70081277092141e-06,
"loss": 0.6099,
"step": 4068
},
{
"epoch": 0.26,
"grad_norm": 0.9216628670692444,
"learning_rate": 8.700122786149261e-06,
"loss": 0.6042,
"step": 4069
},
{
"epoch": 0.26,
"grad_norm": 1.0060338973999023,
"learning_rate": 8.699432645577812e-06,
"loss": 0.7019,
"step": 4070
},
{
"epoch": 0.26,
"grad_norm": 0.8804594278335571,
"learning_rate": 8.698742349236124e-06,
"loss": 0.5648,
"step": 4071
},
{
"epoch": 0.26,
"grad_norm": 0.9049243927001953,
"learning_rate": 8.698051897153264e-06,
"loss": 0.6575,
"step": 4072
},
{
"epoch": 0.26,
"grad_norm": 0.8847574591636658,
"learning_rate": 8.697361289358302e-06,
"loss": 0.5955,
"step": 4073
},
{
"epoch": 0.26,
"grad_norm": 0.943061351776123,
"learning_rate": 8.696670525880318e-06,
"loss": 0.6276,
"step": 4074
},
{
"epoch": 0.26,
"grad_norm": 0.9238365292549133,
"learning_rate": 8.695979606748398e-06,
"loss": 0.5952,
"step": 4075
},
{
"epoch": 0.26,
"grad_norm": 1.0495893955230713,
"learning_rate": 8.695288531991633e-06,
"loss": 0.6174,
"step": 4076
},
{
"epoch": 0.26,
"grad_norm": 0.8936722278594971,
"learning_rate": 8.694597301639125e-06,
"loss": 0.6327,
"step": 4077
},
{
"epoch": 0.26,
"grad_norm": 0.9051495790481567,
"learning_rate": 8.693905915719976e-06,
"loss": 0.6268,
"step": 4078
},
{
"epoch": 0.26,
"grad_norm": 0.883945882320404,
"learning_rate": 8.693214374263298e-06,
"loss": 0.6083,
"step": 4079
},
{
"epoch": 0.26,
"grad_norm": 0.8976554274559021,
"learning_rate": 8.692522677298213e-06,
"loss": 0.6267,
"step": 4080
},
{
"epoch": 0.26,
"grad_norm": 0.8834015727043152,
"learning_rate": 8.691830824853843e-06,
"loss": 0.6268,
"step": 4081
},
{
"epoch": 0.26,
"grad_norm": 0.8857308030128479,
"learning_rate": 8.691138816959318e-06,
"loss": 0.6356,
"step": 4082
},
{
"epoch": 0.26,
"grad_norm": 0.9096359014511108,
"learning_rate": 8.690446653643778e-06,
"loss": 0.6125,
"step": 4083
},
{
"epoch": 0.26,
"grad_norm": 0.9551771879196167,
"learning_rate": 8.68975433493637e-06,
"loss": 0.627,
"step": 4084
},
{
"epoch": 0.26,
"grad_norm": 0.9362192749977112,
"learning_rate": 8.689061860866242e-06,
"loss": 0.5975,
"step": 4085
},
{
"epoch": 0.26,
"grad_norm": 0.9172837734222412,
"learning_rate": 8.68836923146255e-06,
"loss": 0.6706,
"step": 4086
},
{
"epoch": 0.26,
"grad_norm": 0.9365245699882507,
"learning_rate": 8.687676446754464e-06,
"loss": 0.6429,
"step": 4087
},
{
"epoch": 0.26,
"grad_norm": 0.9222214221954346,
"learning_rate": 8.686983506771149e-06,
"loss": 0.6116,
"step": 4088
},
{
"epoch": 0.26,
"grad_norm": 0.8810616731643677,
"learning_rate": 8.686290411541785e-06,
"loss": 0.5765,
"step": 4089
},
{
"epoch": 0.26,
"grad_norm": 0.8715612888336182,
"learning_rate": 8.685597161095555e-06,
"loss": 0.5724,
"step": 4090
},
{
"epoch": 0.26,
"grad_norm": 0.8744463920593262,
"learning_rate": 8.68490375546165e-06,
"loss": 0.5963,
"step": 4091
},
{
"epoch": 0.26,
"grad_norm": 0.936255931854248,
"learning_rate": 8.684210194669269e-06,
"loss": 0.6308,
"step": 4092
},
{
"epoch": 0.26,
"grad_norm": 0.9600224494934082,
"learning_rate": 8.68351647874761e-06,
"loss": 0.6213,
"step": 4093
},
{
"epoch": 0.26,
"grad_norm": 0.9066085815429688,
"learning_rate": 8.682822607725887e-06,
"loss": 0.6384,
"step": 4094
},
{
"epoch": 0.26,
"grad_norm": 0.9050360918045044,
"learning_rate": 8.682128581633316e-06,
"loss": 0.6109,
"step": 4095
},
{
"epoch": 0.26,
"grad_norm": 0.8644648194313049,
"learning_rate": 8.68143440049912e-06,
"loss": 0.6181,
"step": 4096
},
{
"epoch": 0.26,
"grad_norm": 0.8626109957695007,
"learning_rate": 8.68074006435253e-06,
"loss": 0.6159,
"step": 4097
},
{
"epoch": 0.26,
"grad_norm": 0.9877548217773438,
"learning_rate": 8.680045573222776e-06,
"loss": 0.6638,
"step": 4098
},
{
"epoch": 0.26,
"grad_norm": 0.9884246587753296,
"learning_rate": 8.679350927139108e-06,
"loss": 0.6608,
"step": 4099
},
{
"epoch": 0.26,
"grad_norm": 0.8285159468650818,
"learning_rate": 8.678656126130768e-06,
"loss": 0.6032,
"step": 4100
},
{
"epoch": 0.26,
"grad_norm": 0.9355902671813965,
"learning_rate": 8.677961170227021e-06,
"loss": 0.6451,
"step": 4101
},
{
"epoch": 0.26,
"grad_norm": 0.8407034873962402,
"learning_rate": 8.677266059457121e-06,
"loss": 0.6093,
"step": 4102
},
{
"epoch": 0.26,
"grad_norm": 0.8834094405174255,
"learning_rate": 8.67657079385034e-06,
"loss": 0.6294,
"step": 4103
},
{
"epoch": 0.26,
"grad_norm": 0.953618049621582,
"learning_rate": 8.675875373435951e-06,
"loss": 0.5979,
"step": 4104
},
{
"epoch": 0.26,
"grad_norm": 0.9017611742019653,
"learning_rate": 8.67517979824324e-06,
"loss": 0.5706,
"step": 4105
},
{
"epoch": 0.26,
"grad_norm": 0.9118735194206238,
"learning_rate": 8.674484068301492e-06,
"loss": 0.6226,
"step": 4106
},
{
"epoch": 0.26,
"grad_norm": 0.8959848284721375,
"learning_rate": 8.673788183640001e-06,
"loss": 0.6742,
"step": 4107
},
{
"epoch": 0.26,
"grad_norm": 0.9677282571792603,
"learning_rate": 8.673092144288071e-06,
"loss": 0.6542,
"step": 4108
},
{
"epoch": 0.26,
"grad_norm": 0.8986738324165344,
"learning_rate": 8.672395950275008e-06,
"loss": 0.6457,
"step": 4109
},
{
"epoch": 0.26,
"grad_norm": 0.9157966375350952,
"learning_rate": 8.671699601630127e-06,
"loss": 0.6178,
"step": 4110
},
{
"epoch": 0.26,
"grad_norm": 0.8645839095115662,
"learning_rate": 8.67100309838275e-06,
"loss": 0.5971,
"step": 4111
},
{
"epoch": 0.26,
"grad_norm": 0.9088585376739502,
"learning_rate": 8.670306440562202e-06,
"loss": 0.635,
"step": 4112
},
{
"epoch": 0.26,
"grad_norm": 0.8277181386947632,
"learning_rate": 8.669609628197817e-06,
"loss": 0.5686,
"step": 4113
},
{
"epoch": 0.26,
"grad_norm": 0.9371722340583801,
"learning_rate": 8.668912661318938e-06,
"loss": 0.6229,
"step": 4114
},
{
"epoch": 0.26,
"grad_norm": 0.8745486736297607,
"learning_rate": 8.66821553995491e-06,
"loss": 0.6389,
"step": 4115
},
{
"epoch": 0.26,
"grad_norm": 0.8587163686752319,
"learning_rate": 8.667518264135085e-06,
"loss": 0.5837,
"step": 4116
},
{
"epoch": 0.26,
"grad_norm": 0.895158588886261,
"learning_rate": 8.666820833888825e-06,
"loss": 0.5817,
"step": 4117
},
{
"epoch": 0.26,
"grad_norm": 0.9290642738342285,
"learning_rate": 8.666123249245495e-06,
"loss": 0.6401,
"step": 4118
},
{
"epoch": 0.26,
"grad_norm": 0.9100977778434753,
"learning_rate": 8.665425510234469e-06,
"loss": 0.6622,
"step": 4119
},
{
"epoch": 0.26,
"grad_norm": 0.8745128512382507,
"learning_rate": 8.664727616885126e-06,
"loss": 0.613,
"step": 4120
},
{
"epoch": 0.26,
"grad_norm": 0.8843961954116821,
"learning_rate": 8.66402956922685e-06,
"loss": 0.6118,
"step": 4121
},
{
"epoch": 0.26,
"grad_norm": 0.9334408044815063,
"learning_rate": 8.663331367289038e-06,
"loss": 0.6604,
"step": 4122
},
{
"epoch": 0.26,
"grad_norm": 0.8388084769248962,
"learning_rate": 8.662633011101084e-06,
"loss": 0.5691,
"step": 4123
},
{
"epoch": 0.26,
"grad_norm": 0.8637480139732361,
"learning_rate": 8.661934500692395e-06,
"loss": 0.6299,
"step": 4124
},
{
"epoch": 0.26,
"grad_norm": 0.8830687403678894,
"learning_rate": 8.661235836092385e-06,
"loss": 0.6879,
"step": 4125
},
{
"epoch": 0.26,
"grad_norm": 0.91837477684021,
"learning_rate": 8.660537017330468e-06,
"loss": 0.6245,
"step": 4126
},
{
"epoch": 0.26,
"grad_norm": 0.9373289346694946,
"learning_rate": 8.659838044436074e-06,
"loss": 0.6387,
"step": 4127
},
{
"epoch": 0.26,
"grad_norm": 0.8295657634735107,
"learning_rate": 8.65913891743863e-06,
"loss": 0.546,
"step": 4128
},
{
"epoch": 0.26,
"grad_norm": 0.8394411206245422,
"learning_rate": 8.658439636367574e-06,
"loss": 0.5775,
"step": 4129
},
{
"epoch": 0.26,
"grad_norm": 0.9311953186988831,
"learning_rate": 8.657740201252353e-06,
"loss": 0.6699,
"step": 4130
},
{
"epoch": 0.26,
"grad_norm": 0.9466168284416199,
"learning_rate": 8.657040612122418e-06,
"loss": 0.6419,
"step": 4131
},
{
"epoch": 0.26,
"grad_norm": 0.9362534880638123,
"learning_rate": 8.656340869007225e-06,
"loss": 0.6982,
"step": 4132
},
{
"epoch": 0.26,
"grad_norm": 0.9404389262199402,
"learning_rate": 8.655640971936236e-06,
"loss": 0.6242,
"step": 4133
},
{
"epoch": 0.26,
"grad_norm": 0.9012186527252197,
"learning_rate": 8.654940920938922e-06,
"loss": 0.6187,
"step": 4134
},
{
"epoch": 0.26,
"grad_norm": 0.8309886455535889,
"learning_rate": 8.654240716044762e-06,
"loss": 0.6226,
"step": 4135
},
{
"epoch": 0.26,
"grad_norm": 0.9367273449897766,
"learning_rate": 8.653540357283236e-06,
"loss": 0.5919,
"step": 4136
},
{
"epoch": 0.26,
"grad_norm": 0.8980950713157654,
"learning_rate": 8.652839844683836e-06,
"loss": 0.5913,
"step": 4137
},
{
"epoch": 0.26,
"grad_norm": 0.8785884976387024,
"learning_rate": 8.652139178276058e-06,
"loss": 0.6348,
"step": 4138
},
{
"epoch": 0.26,
"grad_norm": 0.8896494507789612,
"learning_rate": 8.651438358089403e-06,
"loss": 0.6578,
"step": 4139
},
{
"epoch": 0.26,
"grad_norm": 0.9590379595756531,
"learning_rate": 8.650737384153382e-06,
"loss": 0.6917,
"step": 4140
},
{
"epoch": 0.26,
"grad_norm": 0.9541071653366089,
"learning_rate": 8.65003625649751e-06,
"loss": 0.582,
"step": 4141
},
{
"epoch": 0.26,
"grad_norm": 0.9491351246833801,
"learning_rate": 8.649334975151307e-06,
"loss": 0.6342,
"step": 4142
},
{
"epoch": 0.26,
"grad_norm": 0.979164183139801,
"learning_rate": 8.648633540144304e-06,
"loss": 0.6439,
"step": 4143
},
{
"epoch": 0.26,
"grad_norm": 0.8879642486572266,
"learning_rate": 8.647931951506037e-06,
"loss": 0.6109,
"step": 4144
},
{
"epoch": 0.26,
"grad_norm": 0.8990030884742737,
"learning_rate": 8.647230209266043e-06,
"loss": 0.6334,
"step": 4145
},
{
"epoch": 0.26,
"grad_norm": 0.9525482654571533,
"learning_rate": 8.646528313453876e-06,
"loss": 0.6203,
"step": 4146
},
{
"epoch": 0.26,
"grad_norm": 0.8282102942466736,
"learning_rate": 8.645826264099085e-06,
"loss": 0.573,
"step": 4147
},
{
"epoch": 0.26,
"grad_norm": 0.8854700922966003,
"learning_rate": 8.645124061231234e-06,
"loss": 0.6247,
"step": 4148
},
{
"epoch": 0.26,
"grad_norm": 0.8921488523483276,
"learning_rate": 8.644421704879889e-06,
"loss": 0.5295,
"step": 4149
},
{
"epoch": 0.26,
"grad_norm": 0.8470342755317688,
"learning_rate": 8.643719195074622e-06,
"loss": 0.5909,
"step": 4150
},
{
"epoch": 0.26,
"grad_norm": 0.8630185127258301,
"learning_rate": 8.643016531845017e-06,
"loss": 0.6125,
"step": 4151
},
{
"epoch": 0.26,
"grad_norm": 1.019774079322815,
"learning_rate": 8.642313715220659e-06,
"loss": 0.6089,
"step": 4152
},
{
"epoch": 0.26,
"grad_norm": 0.8334149122238159,
"learning_rate": 8.641610745231142e-06,
"loss": 0.5966,
"step": 4153
},
{
"epoch": 0.26,
"grad_norm": 0.8783389925956726,
"learning_rate": 8.640907621906062e-06,
"loss": 0.5849,
"step": 4154
},
{
"epoch": 0.26,
"grad_norm": 0.9363436698913574,
"learning_rate": 8.640204345275029e-06,
"loss": 0.6535,
"step": 4155
},
{
"epoch": 0.26,
"grad_norm": 0.9536002278327942,
"learning_rate": 8.639500915367656e-06,
"loss": 0.6491,
"step": 4156
},
{
"epoch": 0.26,
"grad_norm": 0.9206741452217102,
"learning_rate": 8.63879733221356e-06,
"loss": 0.657,
"step": 4157
},
{
"epoch": 0.26,
"grad_norm": 0.943328320980072,
"learning_rate": 8.638093595842366e-06,
"loss": 0.6666,
"step": 4158
},
{
"epoch": 0.26,
"grad_norm": 0.9073593616485596,
"learning_rate": 8.637389706283705e-06,
"loss": 0.5944,
"step": 4159
},
{
"epoch": 0.26,
"grad_norm": 0.9186743497848511,
"learning_rate": 8.636685663567219e-06,
"loss": 0.6469,
"step": 4160
},
{
"epoch": 0.26,
"grad_norm": 0.8272576928138733,
"learning_rate": 8.635981467722552e-06,
"loss": 0.6093,
"step": 4161
},
{
"epoch": 0.26,
"grad_norm": 0.827934741973877,
"learning_rate": 8.635277118779353e-06,
"loss": 0.5911,
"step": 4162
},
{
"epoch": 0.26,
"grad_norm": 0.8880283832550049,
"learning_rate": 8.63457261676728e-06,
"loss": 0.6092,
"step": 4163
},
{
"epoch": 0.26,
"grad_norm": 0.8852022886276245,
"learning_rate": 8.633867961715998e-06,
"loss": 0.5906,
"step": 4164
},
{
"epoch": 0.26,
"grad_norm": 0.8944527506828308,
"learning_rate": 8.633163153655178e-06,
"loss": 0.6314,
"step": 4165
},
{
"epoch": 0.26,
"grad_norm": 0.9245870113372803,
"learning_rate": 8.632458192614495e-06,
"loss": 0.6901,
"step": 4166
},
{
"epoch": 0.26,
"grad_norm": 0.8997650146484375,
"learning_rate": 8.631753078623635e-06,
"loss": 0.5836,
"step": 4167
},
{
"epoch": 0.26,
"grad_norm": 0.935129702091217,
"learning_rate": 8.631047811712288e-06,
"loss": 0.6776,
"step": 4168
},
{
"epoch": 0.26,
"grad_norm": 0.9850293397903442,
"learning_rate": 8.630342391910147e-06,
"loss": 0.6637,
"step": 4169
},
{
"epoch": 0.26,
"grad_norm": 0.9164685010910034,
"learning_rate": 8.629636819246919e-06,
"loss": 0.6207,
"step": 4170
},
{
"epoch": 0.26,
"grad_norm": 0.8634175658226013,
"learning_rate": 8.628931093752308e-06,
"loss": 0.6029,
"step": 4171
},
{
"epoch": 0.26,
"grad_norm": 0.8743361234664917,
"learning_rate": 8.628225215456037e-06,
"loss": 0.6149,
"step": 4172
},
{
"epoch": 0.26,
"grad_norm": 0.9644536972045898,
"learning_rate": 8.627519184387821e-06,
"loss": 0.6623,
"step": 4173
},
{
"epoch": 0.26,
"grad_norm": 0.9518513679504395,
"learning_rate": 8.626813000577393e-06,
"loss": 0.6665,
"step": 4174
},
{
"epoch": 0.26,
"grad_norm": 0.9795065522193909,
"learning_rate": 8.626106664054483e-06,
"loss": 0.6404,
"step": 4175
},
{
"epoch": 0.26,
"grad_norm": 0.8946532011032104,
"learning_rate": 8.62540017484884e-06,
"loss": 0.6109,
"step": 4176
},
{
"epoch": 0.26,
"grad_norm": 0.8872295618057251,
"learning_rate": 8.624693532990205e-06,
"loss": 0.591,
"step": 4177
},
{
"epoch": 0.26,
"grad_norm": 0.9337349534034729,
"learning_rate": 8.623986738508334e-06,
"loss": 0.641,
"step": 4178
},
{
"epoch": 0.26,
"grad_norm": 0.8817663788795471,
"learning_rate": 8.62327979143299e-06,
"loss": 0.5987,
"step": 4179
},
{
"epoch": 0.26,
"grad_norm": 0.9417575001716614,
"learning_rate": 8.622572691793937e-06,
"loss": 0.5693,
"step": 4180
},
{
"epoch": 0.26,
"grad_norm": 0.8882385492324829,
"learning_rate": 8.621865439620952e-06,
"loss": 0.5992,
"step": 4181
},
{
"epoch": 0.26,
"grad_norm": 0.8872155547142029,
"learning_rate": 8.621158034943812e-06,
"loss": 0.6055,
"step": 4182
},
{
"epoch": 0.27,
"grad_norm": 0.8701667189598083,
"learning_rate": 8.620450477792303e-06,
"loss": 0.6059,
"step": 4183
},
{
"epoch": 0.27,
"grad_norm": 0.8833332657814026,
"learning_rate": 8.619742768196221e-06,
"loss": 0.5834,
"step": 4184
},
{
"epoch": 0.27,
"grad_norm": 0.9163500070571899,
"learning_rate": 8.619034906185362e-06,
"loss": 0.6927,
"step": 4185
},
{
"epoch": 0.27,
"grad_norm": 0.9250738620758057,
"learning_rate": 8.618326891789534e-06,
"loss": 0.6408,
"step": 4186
},
{
"epoch": 0.27,
"grad_norm": 0.9231948852539062,
"learning_rate": 8.617618725038545e-06,
"loss": 0.6151,
"step": 4187
},
{
"epoch": 0.27,
"grad_norm": 0.8991936445236206,
"learning_rate": 8.61691040596222e-06,
"loss": 0.6433,
"step": 4188
},
{
"epoch": 0.27,
"grad_norm": 0.9138967990875244,
"learning_rate": 8.616201934590379e-06,
"loss": 0.6513,
"step": 4189
},
{
"epoch": 0.27,
"grad_norm": 0.9194620251655579,
"learning_rate": 8.615493310952852e-06,
"loss": 0.6536,
"step": 4190
},
{
"epoch": 0.27,
"grad_norm": 0.888721227645874,
"learning_rate": 8.614784535079482e-06,
"loss": 0.606,
"step": 4191
},
{
"epoch": 0.27,
"grad_norm": 0.9047959446907043,
"learning_rate": 8.614075607000108e-06,
"loss": 0.6485,
"step": 4192
},
{
"epoch": 0.27,
"grad_norm": 0.9056040644645691,
"learning_rate": 8.613366526744584e-06,
"loss": 0.5843,
"step": 4193
},
{
"epoch": 0.27,
"grad_norm": 0.9224606156349182,
"learning_rate": 8.612657294342765e-06,
"loss": 0.5978,
"step": 4194
},
{
"epoch": 0.27,
"grad_norm": 0.9035705327987671,
"learning_rate": 8.611947909824514e-06,
"loss": 0.651,
"step": 4195
},
{
"epoch": 0.27,
"grad_norm": 0.8923839330673218,
"learning_rate": 8.611238373219703e-06,
"loss": 0.5926,
"step": 4196
},
{
"epoch": 0.27,
"grad_norm": 0.9223050475120544,
"learning_rate": 8.610528684558206e-06,
"loss": 0.5893,
"step": 4197
},
{
"epoch": 0.27,
"grad_norm": 0.9211618900299072,
"learning_rate": 8.609818843869907e-06,
"loss": 0.6018,
"step": 4198
},
{
"epoch": 0.27,
"grad_norm": 0.8177082538604736,
"learning_rate": 8.609108851184693e-06,
"loss": 0.587,
"step": 4199
},
{
"epoch": 0.27,
"grad_norm": 0.8298165202140808,
"learning_rate": 8.608398706532462e-06,
"loss": 0.6308,
"step": 4200
},
{
"epoch": 0.27,
"grad_norm": 0.8628758192062378,
"learning_rate": 8.607688409943112e-06,
"loss": 0.5662,
"step": 4201
},
{
"epoch": 0.27,
"grad_norm": 0.8658290505409241,
"learning_rate": 8.606977961446554e-06,
"loss": 0.6113,
"step": 4202
},
{
"epoch": 0.27,
"grad_norm": 0.9051910638809204,
"learning_rate": 8.606267361072704e-06,
"loss": 0.6256,
"step": 4203
},
{
"epoch": 0.27,
"grad_norm": 0.8783097267150879,
"learning_rate": 8.605556608851478e-06,
"loss": 0.6607,
"step": 4204
},
{
"epoch": 0.27,
"grad_norm": 0.9676861763000488,
"learning_rate": 8.604845704812808e-06,
"loss": 0.6564,
"step": 4205
},
{
"epoch": 0.27,
"grad_norm": 0.9138243198394775,
"learning_rate": 8.604134648986625e-06,
"loss": 0.5926,
"step": 4206
},
{
"epoch": 0.27,
"grad_norm": 0.9041840434074402,
"learning_rate": 8.603423441402868e-06,
"loss": 0.6202,
"step": 4207
},
{
"epoch": 0.27,
"grad_norm": 0.8703333735466003,
"learning_rate": 8.602712082091487e-06,
"loss": 0.573,
"step": 4208
},
{
"epoch": 0.27,
"grad_norm": 0.9118040204048157,
"learning_rate": 8.602000571082432e-06,
"loss": 0.6348,
"step": 4209
},
{
"epoch": 0.27,
"grad_norm": 0.9517326354980469,
"learning_rate": 8.601288908405665e-06,
"loss": 0.622,
"step": 4210
},
{
"epoch": 0.27,
"grad_norm": 0.9293259978294373,
"learning_rate": 8.60057709409115e-06,
"loss": 0.6272,
"step": 4211
},
{
"epoch": 0.27,
"grad_norm": 0.8603157997131348,
"learning_rate": 8.599865128168858e-06,
"loss": 0.5833,
"step": 4212
},
{
"epoch": 0.27,
"grad_norm": 0.8905279040336609,
"learning_rate": 8.599153010668768e-06,
"loss": 0.5917,
"step": 4213
},
{
"epoch": 0.27,
"grad_norm": 0.9047275185585022,
"learning_rate": 8.598440741620868e-06,
"loss": 0.6405,
"step": 4214
},
{
"epoch": 0.27,
"grad_norm": 0.8636517524719238,
"learning_rate": 8.597728321055144e-06,
"loss": 0.5763,
"step": 4215
},
{
"epoch": 0.27,
"grad_norm": 0.8629072904586792,
"learning_rate": 8.597015749001596e-06,
"loss": 0.6013,
"step": 4216
},
{
"epoch": 0.27,
"grad_norm": 0.8857645988464355,
"learning_rate": 8.59630302549023e-06,
"loss": 0.6191,
"step": 4217
},
{
"epoch": 0.27,
"grad_norm": 0.9491539597511292,
"learning_rate": 8.595590150551052e-06,
"loss": 0.6271,
"step": 4218
},
{
"epoch": 0.27,
"grad_norm": 0.9557621479034424,
"learning_rate": 8.59487712421408e-06,
"loss": 0.6135,
"step": 4219
},
{
"epoch": 0.27,
"grad_norm": 0.9056437611579895,
"learning_rate": 8.594163946509339e-06,
"loss": 0.6211,
"step": 4220
},
{
"epoch": 0.27,
"grad_norm": 0.8638589978218079,
"learning_rate": 8.593450617466859e-06,
"loss": 0.5999,
"step": 4221
},
{
"epoch": 0.27,
"grad_norm": 0.9568116664886475,
"learning_rate": 8.592737137116673e-06,
"loss": 0.6038,
"step": 4222
},
{
"epoch": 0.27,
"grad_norm": 0.9060722589492798,
"learning_rate": 8.592023505488825e-06,
"loss": 0.6373,
"step": 4223
},
{
"epoch": 0.27,
"grad_norm": 0.7833012342453003,
"learning_rate": 8.591309722613362e-06,
"loss": 0.569,
"step": 4224
},
{
"epoch": 0.27,
"grad_norm": 0.9138297438621521,
"learning_rate": 8.590595788520342e-06,
"loss": 0.5829,
"step": 4225
},
{
"epoch": 0.27,
"grad_norm": 0.8410037755966187,
"learning_rate": 8.589881703239821e-06,
"loss": 0.5491,
"step": 4226
},
{
"epoch": 0.27,
"grad_norm": 0.8916024565696716,
"learning_rate": 8.58916746680187e-06,
"loss": 0.6094,
"step": 4227
},
{
"epoch": 0.27,
"grad_norm": 0.9920042157173157,
"learning_rate": 8.588453079236565e-06,
"loss": 0.6644,
"step": 4228
},
{
"epoch": 0.27,
"grad_norm": 0.9212594032287598,
"learning_rate": 8.587738540573984e-06,
"loss": 0.5878,
"step": 4229
},
{
"epoch": 0.27,
"grad_norm": 0.8286495804786682,
"learning_rate": 8.587023850844212e-06,
"loss": 0.6002,
"step": 4230
},
{
"epoch": 0.27,
"grad_norm": 0.8914030194282532,
"learning_rate": 8.586309010077345e-06,
"loss": 0.6672,
"step": 4231
},
{
"epoch": 0.27,
"grad_norm": 0.8013595342636108,
"learning_rate": 8.585594018303482e-06,
"loss": 0.6138,
"step": 4232
},
{
"epoch": 0.27,
"grad_norm": 0.8565639853477478,
"learning_rate": 8.584878875552727e-06,
"loss": 0.6073,
"step": 4233
},
{
"epoch": 0.27,
"grad_norm": 0.818520188331604,
"learning_rate": 8.584163581855194e-06,
"loss": 0.6158,
"step": 4234
},
{
"epoch": 0.27,
"grad_norm": 0.9362378120422363,
"learning_rate": 8.583448137241002e-06,
"loss": 0.629,
"step": 4235
},
{
"epoch": 0.27,
"grad_norm": 0.9456666111946106,
"learning_rate": 8.582732541740273e-06,
"loss": 0.617,
"step": 4236
},
{
"epoch": 0.27,
"grad_norm": 0.8908970952033997,
"learning_rate": 8.582016795383142e-06,
"loss": 0.5931,
"step": 4237
},
{
"epoch": 0.27,
"grad_norm": 0.8807900547981262,
"learning_rate": 8.581300898199743e-06,
"loss": 0.5685,
"step": 4238
},
{
"epoch": 0.27,
"grad_norm": 0.8527096509933472,
"learning_rate": 8.580584850220222e-06,
"loss": 0.6016,
"step": 4239
},
{
"epoch": 0.27,
"grad_norm": 0.942776620388031,
"learning_rate": 8.57986865147473e-06,
"loss": 0.5871,
"step": 4240
},
{
"epoch": 0.27,
"grad_norm": 0.9495031237602234,
"learning_rate": 8.57915230199342e-06,
"loss": 0.6078,
"step": 4241
},
{
"epoch": 0.27,
"grad_norm": 0.9065079092979431,
"learning_rate": 8.578435801806461e-06,
"loss": 0.6451,
"step": 4242
},
{
"epoch": 0.27,
"grad_norm": 0.8677025437355042,
"learning_rate": 8.577719150944017e-06,
"loss": 0.6228,
"step": 4243
},
{
"epoch": 0.27,
"grad_norm": 0.9314882755279541,
"learning_rate": 8.577002349436264e-06,
"loss": 0.5969,
"step": 4244
},
{
"epoch": 0.27,
"grad_norm": 1.0232270956039429,
"learning_rate": 8.57628539731339e-06,
"loss": 0.6652,
"step": 4245
},
{
"epoch": 0.27,
"grad_norm": 0.8840213418006897,
"learning_rate": 8.575568294605574e-06,
"loss": 0.6591,
"step": 4246
},
{
"epoch": 0.27,
"grad_norm": 0.935551643371582,
"learning_rate": 8.574851041343018e-06,
"loss": 0.5936,
"step": 4247
},
{
"epoch": 0.27,
"grad_norm": 0.9176490902900696,
"learning_rate": 8.574133637555921e-06,
"loss": 0.6103,
"step": 4248
},
{
"epoch": 0.27,
"grad_norm": 0.8537380695343018,
"learning_rate": 8.57341608327449e-06,
"loss": 0.5831,
"step": 4249
},
{
"epoch": 0.27,
"grad_norm": 0.8898982405662537,
"learning_rate": 8.572698378528937e-06,
"loss": 0.6522,
"step": 4250
},
{
"epoch": 0.27,
"grad_norm": 0.8428791761398315,
"learning_rate": 8.571980523349485e-06,
"loss": 0.6097,
"step": 4251
},
{
"epoch": 0.27,
"grad_norm": 0.9399141669273376,
"learning_rate": 8.57126251776636e-06,
"loss": 0.6514,
"step": 4252
},
{
"epoch": 0.27,
"grad_norm": 0.9143974781036377,
"learning_rate": 8.570544361809792e-06,
"loss": 0.6807,
"step": 4253
},
{
"epoch": 0.27,
"grad_norm": 0.9048095941543579,
"learning_rate": 8.569826055510025e-06,
"loss": 0.5986,
"step": 4254
},
{
"epoch": 0.27,
"grad_norm": 0.8654329776763916,
"learning_rate": 8.569107598897296e-06,
"loss": 0.5274,
"step": 4255
},
{
"epoch": 0.27,
"grad_norm": 0.9597179293632507,
"learning_rate": 8.568388992001868e-06,
"loss": 0.5958,
"step": 4256
},
{
"epoch": 0.27,
"grad_norm": 0.8860706090927124,
"learning_rate": 8.56767023485399e-06,
"loss": 0.5915,
"step": 4257
},
{
"epoch": 0.27,
"grad_norm": 0.8715736269950867,
"learning_rate": 8.56695132748393e-06,
"loss": 0.6533,
"step": 4258
},
{
"epoch": 0.27,
"grad_norm": 0.9161938428878784,
"learning_rate": 8.566232269921957e-06,
"loss": 0.7043,
"step": 4259
},
{
"epoch": 0.27,
"grad_norm": 0.8063193559646606,
"learning_rate": 8.565513062198351e-06,
"loss": 0.6129,
"step": 4260
},
{
"epoch": 0.27,
"grad_norm": 0.9243603944778442,
"learning_rate": 8.564793704343392e-06,
"loss": 0.5744,
"step": 4261
},
{
"epoch": 0.27,
"grad_norm": 0.9865625500679016,
"learning_rate": 8.564074196387371e-06,
"loss": 0.6796,
"step": 4262
},
{
"epoch": 0.27,
"grad_norm": 0.8942394256591797,
"learning_rate": 8.563354538360585e-06,
"loss": 0.6083,
"step": 4263
},
{
"epoch": 0.27,
"grad_norm": 0.9242582321166992,
"learning_rate": 8.562634730293335e-06,
"loss": 0.5982,
"step": 4264
},
{
"epoch": 0.27,
"grad_norm": 0.9217408895492554,
"learning_rate": 8.56191477221593e-06,
"loss": 0.6569,
"step": 4265
},
{
"epoch": 0.27,
"grad_norm": 0.9580654501914978,
"learning_rate": 8.561194664158685e-06,
"loss": 0.6733,
"step": 4266
},
{
"epoch": 0.27,
"grad_norm": 0.9393530488014221,
"learning_rate": 8.560474406151921e-06,
"loss": 0.668,
"step": 4267
},
{
"epoch": 0.27,
"grad_norm": 0.9454185962677002,
"learning_rate": 8.559753998225965e-06,
"loss": 0.6592,
"step": 4268
},
{
"epoch": 0.27,
"grad_norm": 0.9573476910591125,
"learning_rate": 8.559033440411155e-06,
"loss": 0.5933,
"step": 4269
},
{
"epoch": 0.27,
"grad_norm": 0.8191101551055908,
"learning_rate": 8.558312732737825e-06,
"loss": 0.5713,
"step": 4270
},
{
"epoch": 0.27,
"grad_norm": 0.8455954790115356,
"learning_rate": 8.557591875236323e-06,
"loss": 0.5984,
"step": 4271
},
{
"epoch": 0.27,
"grad_norm": 0.8731233477592468,
"learning_rate": 8.556870867937006e-06,
"loss": 0.5876,
"step": 4272
},
{
"epoch": 0.27,
"grad_norm": 0.8729333281517029,
"learning_rate": 8.55614971087023e-06,
"loss": 0.6102,
"step": 4273
},
{
"epoch": 0.27,
"grad_norm": 0.9293901324272156,
"learning_rate": 8.555428404066359e-06,
"loss": 0.6141,
"step": 4274
},
{
"epoch": 0.27,
"grad_norm": 0.8134398460388184,
"learning_rate": 8.554706947555766e-06,
"loss": 0.5814,
"step": 4275
},
{
"epoch": 0.27,
"grad_norm": 0.9086621999740601,
"learning_rate": 8.553985341368832e-06,
"loss": 0.6756,
"step": 4276
},
{
"epoch": 0.27,
"grad_norm": 0.8340302109718323,
"learning_rate": 8.553263585535937e-06,
"loss": 0.6272,
"step": 4277
},
{
"epoch": 0.27,
"grad_norm": 0.9644330143928528,
"learning_rate": 8.552541680087472e-06,
"loss": 0.611,
"step": 4278
},
{
"epoch": 0.27,
"grad_norm": 0.9474432468414307,
"learning_rate": 8.551819625053837e-06,
"loss": 0.6581,
"step": 4279
},
{
"epoch": 0.27,
"grad_norm": 0.8727615475654602,
"learning_rate": 8.551097420465432e-06,
"loss": 0.6059,
"step": 4280
},
{
"epoch": 0.27,
"grad_norm": 0.9292715191841125,
"learning_rate": 8.55037506635267e-06,
"loss": 0.5987,
"step": 4281
},
{
"epoch": 0.27,
"grad_norm": 0.8968216180801392,
"learning_rate": 8.549652562745963e-06,
"loss": 0.6109,
"step": 4282
},
{
"epoch": 0.27,
"grad_norm": 0.919104278087616,
"learning_rate": 8.548929909675736e-06,
"loss": 0.6043,
"step": 4283
},
{
"epoch": 0.27,
"grad_norm": 0.963595449924469,
"learning_rate": 8.548207107172417e-06,
"loss": 0.6421,
"step": 4284
},
{
"epoch": 0.27,
"grad_norm": 0.9195282459259033,
"learning_rate": 8.547484155266439e-06,
"loss": 0.6284,
"step": 4285
},
{
"epoch": 0.27,
"grad_norm": 0.9050331711769104,
"learning_rate": 8.546761053988244e-06,
"loss": 0.6787,
"step": 4286
},
{
"epoch": 0.27,
"grad_norm": 0.8294732570648193,
"learning_rate": 8.546037803368279e-06,
"loss": 0.5982,
"step": 4287
},
{
"epoch": 0.27,
"grad_norm": 0.8532490134239197,
"learning_rate": 8.545314403436998e-06,
"loss": 0.5664,
"step": 4288
},
{
"epoch": 0.27,
"grad_norm": 0.9732022881507874,
"learning_rate": 8.54459085422486e-06,
"loss": 0.6,
"step": 4289
},
{
"epoch": 0.27,
"grad_norm": 0.9613706469535828,
"learning_rate": 8.543867155762335e-06,
"loss": 0.6525,
"step": 4290
},
{
"epoch": 0.27,
"grad_norm": 0.9835689663887024,
"learning_rate": 8.543143308079888e-06,
"loss": 0.6368,
"step": 4291
},
{
"epoch": 0.27,
"grad_norm": 0.857182502746582,
"learning_rate": 8.542419311208006e-06,
"loss": 0.6265,
"step": 4292
},
{
"epoch": 0.27,
"grad_norm": 0.8491384983062744,
"learning_rate": 8.541695165177169e-06,
"loss": 0.664,
"step": 4293
},
{
"epoch": 0.27,
"grad_norm": 0.9267544150352478,
"learning_rate": 8.540970870017867e-06,
"loss": 0.6202,
"step": 4294
},
{
"epoch": 0.27,
"grad_norm": 0.9041336178779602,
"learning_rate": 8.540246425760602e-06,
"loss": 0.5934,
"step": 4295
},
{
"epoch": 0.27,
"grad_norm": 0.9102574586868286,
"learning_rate": 8.539521832435874e-06,
"loss": 0.5931,
"step": 4296
},
{
"epoch": 0.27,
"grad_norm": 0.8750420212745667,
"learning_rate": 8.538797090074196e-06,
"loss": 0.6128,
"step": 4297
},
{
"epoch": 0.27,
"grad_norm": 0.9216861724853516,
"learning_rate": 8.538072198706081e-06,
"loss": 0.6311,
"step": 4298
},
{
"epoch": 0.27,
"grad_norm": 0.8805850744247437,
"learning_rate": 8.537347158362056e-06,
"loss": 0.58,
"step": 4299
},
{
"epoch": 0.27,
"grad_norm": 0.8909803032875061,
"learning_rate": 8.536621969072648e-06,
"loss": 0.607,
"step": 4300
},
{
"epoch": 0.27,
"grad_norm": 0.9267565608024597,
"learning_rate": 8.53589663086839e-06,
"loss": 0.6457,
"step": 4301
},
{
"epoch": 0.27,
"grad_norm": 0.9968888759613037,
"learning_rate": 8.535171143779828e-06,
"loss": 0.6252,
"step": 4302
},
{
"epoch": 0.27,
"grad_norm": 0.8970872163772583,
"learning_rate": 8.534445507837505e-06,
"loss": 0.6065,
"step": 4303
},
{
"epoch": 0.27,
"grad_norm": 0.9261126518249512,
"learning_rate": 8.533719723071979e-06,
"loss": 0.6377,
"step": 4304
},
{
"epoch": 0.27,
"grad_norm": 0.9060932993888855,
"learning_rate": 8.532993789513805e-06,
"loss": 0.6167,
"step": 4305
},
{
"epoch": 0.27,
"grad_norm": 0.9795500636100769,
"learning_rate": 8.532267707193555e-06,
"loss": 0.6384,
"step": 4306
},
{
"epoch": 0.27,
"grad_norm": 0.8952150940895081,
"learning_rate": 8.5315414761418e-06,
"loss": 0.6448,
"step": 4307
},
{
"epoch": 0.27,
"grad_norm": 0.9257222414016724,
"learning_rate": 8.530815096389118e-06,
"loss": 0.5725,
"step": 4308
},
{
"epoch": 0.27,
"grad_norm": 0.871077299118042,
"learning_rate": 8.530088567966095e-06,
"loss": 0.6262,
"step": 4309
},
{
"epoch": 0.27,
"grad_norm": 0.8593372702598572,
"learning_rate": 8.529361890903323e-06,
"loss": 0.5855,
"step": 4310
},
{
"epoch": 0.27,
"grad_norm": 0.9580448865890503,
"learning_rate": 8.5286350652314e-06,
"loss": 0.6397,
"step": 4311
},
{
"epoch": 0.27,
"grad_norm": 0.8802589774131775,
"learning_rate": 8.527908090980929e-06,
"loss": 0.6593,
"step": 4312
},
{
"epoch": 0.27,
"grad_norm": 0.9041280746459961,
"learning_rate": 8.527180968182522e-06,
"loss": 0.5961,
"step": 4313
},
{
"epoch": 0.27,
"grad_norm": 0.8729889988899231,
"learning_rate": 8.526453696866794e-06,
"loss": 0.6,
"step": 4314
},
{
"epoch": 0.27,
"grad_norm": 0.8576443195343018,
"learning_rate": 8.525726277064368e-06,
"loss": 0.5911,
"step": 4315
},
{
"epoch": 0.27,
"grad_norm": 0.8359036445617676,
"learning_rate": 8.524998708805874e-06,
"loss": 0.5723,
"step": 4316
},
{
"epoch": 0.27,
"grad_norm": 0.8947839736938477,
"learning_rate": 8.524270992121948e-06,
"loss": 0.6163,
"step": 4317
},
{
"epoch": 0.27,
"grad_norm": 0.9303499460220337,
"learning_rate": 8.523543127043228e-06,
"loss": 0.6144,
"step": 4318
},
{
"epoch": 0.27,
"grad_norm": 0.8773894309997559,
"learning_rate": 8.522815113600366e-06,
"loss": 0.5884,
"step": 4319
},
{
"epoch": 0.27,
"grad_norm": 0.9222464561462402,
"learning_rate": 8.522086951824014e-06,
"loss": 0.6819,
"step": 4320
},
{
"epoch": 0.27,
"grad_norm": 0.8709927797317505,
"learning_rate": 8.521358641744834e-06,
"loss": 0.5886,
"step": 4321
},
{
"epoch": 0.27,
"grad_norm": 0.8806871175765991,
"learning_rate": 8.520630183393492e-06,
"loss": 0.616,
"step": 4322
},
{
"epoch": 0.27,
"grad_norm": 0.9203693866729736,
"learning_rate": 8.519901576800657e-06,
"loss": 0.6442,
"step": 4323
},
{
"epoch": 0.27,
"grad_norm": 0.9157525300979614,
"learning_rate": 8.519172821997015e-06,
"loss": 0.57,
"step": 4324
},
{
"epoch": 0.27,
"grad_norm": 0.8757469058036804,
"learning_rate": 8.518443919013247e-06,
"loss": 0.625,
"step": 4325
},
{
"epoch": 0.27,
"grad_norm": 0.8523043394088745,
"learning_rate": 8.517714867880044e-06,
"loss": 0.5748,
"step": 4326
},
{
"epoch": 0.27,
"grad_norm": 0.8662055730819702,
"learning_rate": 8.516985668628105e-06,
"loss": 0.5595,
"step": 4327
},
{
"epoch": 0.27,
"grad_norm": 0.8649899363517761,
"learning_rate": 8.516256321288136e-06,
"loss": 0.5697,
"step": 4328
},
{
"epoch": 0.27,
"grad_norm": 0.8986943960189819,
"learning_rate": 8.515526825890845e-06,
"loss": 0.5607,
"step": 4329
},
{
"epoch": 0.27,
"grad_norm": 0.9603455066680908,
"learning_rate": 8.514797182466948e-06,
"loss": 0.5942,
"step": 4330
},
{
"epoch": 0.27,
"grad_norm": 0.9389190673828125,
"learning_rate": 8.51406739104717e-06,
"loss": 0.6389,
"step": 4331
},
{
"epoch": 0.27,
"grad_norm": 0.9618402123451233,
"learning_rate": 8.513337451662238e-06,
"loss": 0.6588,
"step": 4332
},
{
"epoch": 0.27,
"grad_norm": 0.9515010118484497,
"learning_rate": 8.512607364342887e-06,
"loss": 0.6097,
"step": 4333
},
{
"epoch": 0.27,
"grad_norm": 0.8656193017959595,
"learning_rate": 8.51187712911986e-06,
"loss": 0.597,
"step": 4334
},
{
"epoch": 0.27,
"grad_norm": 0.9110217094421387,
"learning_rate": 8.511146746023905e-06,
"loss": 0.5888,
"step": 4335
},
{
"epoch": 0.27,
"grad_norm": 0.8885056376457214,
"learning_rate": 8.510416215085775e-06,
"loss": 0.6293,
"step": 4336
},
{
"epoch": 0.27,
"grad_norm": 0.8254531621932983,
"learning_rate": 8.509685536336229e-06,
"loss": 0.5644,
"step": 4337
},
{
"epoch": 0.27,
"grad_norm": 0.8862583041191101,
"learning_rate": 8.508954709806034e-06,
"loss": 0.633,
"step": 4338
},
{
"epoch": 0.27,
"grad_norm": 0.9127135872840881,
"learning_rate": 8.508223735525963e-06,
"loss": 0.624,
"step": 4339
},
{
"epoch": 0.27,
"grad_norm": 0.9787098169326782,
"learning_rate": 8.507492613526795e-06,
"loss": 0.6342,
"step": 4340
},
{
"epoch": 0.28,
"grad_norm": 0.844140887260437,
"learning_rate": 8.506761343839316e-06,
"loss": 0.6042,
"step": 4341
},
{
"epoch": 0.28,
"grad_norm": 0.9551699757575989,
"learning_rate": 8.506029926494315e-06,
"loss": 0.6294,
"step": 4342
},
{
"epoch": 0.28,
"grad_norm": 0.8815372586250305,
"learning_rate": 8.50529836152259e-06,
"loss": 0.678,
"step": 4343
},
{
"epoch": 0.28,
"grad_norm": 0.841645359992981,
"learning_rate": 8.504566648954947e-06,
"loss": 0.5792,
"step": 4344
},
{
"epoch": 0.28,
"grad_norm": 0.8906237483024597,
"learning_rate": 8.503834788822191e-06,
"loss": 0.6074,
"step": 4345
},
{
"epoch": 0.28,
"grad_norm": 0.871210515499115,
"learning_rate": 8.503102781155141e-06,
"loss": 0.5929,
"step": 4346
},
{
"epoch": 0.28,
"grad_norm": 0.823668897151947,
"learning_rate": 8.502370625984622e-06,
"loss": 0.5886,
"step": 4347
},
{
"epoch": 0.28,
"grad_norm": 0.9484293460845947,
"learning_rate": 8.501638323341459e-06,
"loss": 0.6557,
"step": 4348
},
{
"epoch": 0.28,
"grad_norm": 0.8655977249145508,
"learning_rate": 8.500905873256486e-06,
"loss": 0.5899,
"step": 4349
},
{
"epoch": 0.28,
"grad_norm": 0.9463286399841309,
"learning_rate": 8.500173275760546e-06,
"loss": 0.6128,
"step": 4350
},
{
"epoch": 0.28,
"grad_norm": 0.8562267422676086,
"learning_rate": 8.499440530884486e-06,
"loss": 0.5932,
"step": 4351
},
{
"epoch": 0.28,
"grad_norm": 0.9182244539260864,
"learning_rate": 8.498707638659159e-06,
"loss": 0.6024,
"step": 4352
},
{
"epoch": 0.28,
"grad_norm": 0.8319056034088135,
"learning_rate": 8.497974599115424e-06,
"loss": 0.5626,
"step": 4353
},
{
"epoch": 0.28,
"grad_norm": 0.9287349581718445,
"learning_rate": 8.497241412284147e-06,
"loss": 0.6092,
"step": 4354
},
{
"epoch": 0.28,
"grad_norm": 0.8886022567749023,
"learning_rate": 8.496508078196202e-06,
"loss": 0.6414,
"step": 4355
},
{
"epoch": 0.28,
"grad_norm": 0.916700005531311,
"learning_rate": 8.495774596882462e-06,
"loss": 0.5731,
"step": 4356
},
{
"epoch": 0.28,
"grad_norm": 0.8386786580085754,
"learning_rate": 8.495040968373817e-06,
"loss": 0.6356,
"step": 4357
},
{
"epoch": 0.28,
"grad_norm": 0.8589484095573425,
"learning_rate": 8.494307192701154e-06,
"loss": 0.5783,
"step": 4358
},
{
"epoch": 0.28,
"grad_norm": 0.882973849773407,
"learning_rate": 8.493573269895372e-06,
"loss": 0.5763,
"step": 4359
},
{
"epoch": 0.28,
"grad_norm": 0.8396306037902832,
"learning_rate": 8.492839199987373e-06,
"loss": 0.5836,
"step": 4360
},
{
"epoch": 0.28,
"grad_norm": 0.8653340935707092,
"learning_rate": 8.492104983008065e-06,
"loss": 0.5815,
"step": 4361
},
{
"epoch": 0.28,
"grad_norm": 0.8777982592582703,
"learning_rate": 8.491370618988367e-06,
"loss": 0.5753,
"step": 4362
},
{
"epoch": 0.28,
"grad_norm": 0.9289289116859436,
"learning_rate": 8.490636107959194e-06,
"loss": 0.5963,
"step": 4363
},
{
"epoch": 0.28,
"grad_norm": 0.9735289216041565,
"learning_rate": 8.489901449951478e-06,
"loss": 0.6477,
"step": 4364
},
{
"epoch": 0.28,
"grad_norm": 0.9543069005012512,
"learning_rate": 8.489166644996154e-06,
"loss": 0.6315,
"step": 4365
},
{
"epoch": 0.28,
"grad_norm": 0.9369723200798035,
"learning_rate": 8.48843169312416e-06,
"loss": 0.6351,
"step": 4366
},
{
"epoch": 0.28,
"grad_norm": 0.9586085677146912,
"learning_rate": 8.487696594366444e-06,
"loss": 0.6317,
"step": 4367
},
{
"epoch": 0.28,
"grad_norm": 0.8085949420928955,
"learning_rate": 8.486961348753954e-06,
"loss": 0.5292,
"step": 4368
},
{
"epoch": 0.28,
"grad_norm": 0.9245449900627136,
"learning_rate": 8.486225956317655e-06,
"loss": 0.6529,
"step": 4369
},
{
"epoch": 0.28,
"grad_norm": 0.8826268315315247,
"learning_rate": 8.48549041708851e-06,
"loss": 0.5822,
"step": 4370
},
{
"epoch": 0.28,
"grad_norm": 0.8296921253204346,
"learning_rate": 8.484754731097484e-06,
"loss": 0.565,
"step": 4371
},
{
"epoch": 0.28,
"grad_norm": 0.8971067667007446,
"learning_rate": 8.484018898375561e-06,
"loss": 0.606,
"step": 4372
},
{
"epoch": 0.28,
"grad_norm": 0.8723403215408325,
"learning_rate": 8.483282918953723e-06,
"loss": 0.6579,
"step": 4373
},
{
"epoch": 0.28,
"grad_norm": 0.9097625613212585,
"learning_rate": 8.482546792862957e-06,
"loss": 0.6365,
"step": 4374
},
{
"epoch": 0.28,
"grad_norm": 0.8853545784950256,
"learning_rate": 8.481810520134262e-06,
"loss": 0.5961,
"step": 4375
},
{
"epoch": 0.28,
"grad_norm": 0.8926584124565125,
"learning_rate": 8.481074100798638e-06,
"loss": 0.6374,
"step": 4376
},
{
"epoch": 0.28,
"grad_norm": 0.9190264940261841,
"learning_rate": 8.480337534887093e-06,
"loss": 0.6332,
"step": 4377
},
{
"epoch": 0.28,
"grad_norm": 0.9103266596794128,
"learning_rate": 8.479600822430642e-06,
"loss": 0.6575,
"step": 4378
},
{
"epoch": 0.28,
"grad_norm": 0.8518051505088806,
"learning_rate": 8.478863963460306e-06,
"loss": 0.5637,
"step": 4379
},
{
"epoch": 0.28,
"grad_norm": 0.8869740962982178,
"learning_rate": 8.478126958007108e-06,
"loss": 0.6089,
"step": 4380
},
{
"epoch": 0.28,
"grad_norm": 0.8450909852981567,
"learning_rate": 8.477389806102085e-06,
"loss": 0.6446,
"step": 4381
},
{
"epoch": 0.28,
"grad_norm": 0.9005980491638184,
"learning_rate": 8.476652507776274e-06,
"loss": 0.5715,
"step": 4382
},
{
"epoch": 0.28,
"grad_norm": 0.8654862642288208,
"learning_rate": 8.475915063060721e-06,
"loss": 0.6625,
"step": 4383
},
{
"epoch": 0.28,
"grad_norm": 0.9093218445777893,
"learning_rate": 8.475177471986476e-06,
"loss": 0.6045,
"step": 4384
},
{
"epoch": 0.28,
"grad_norm": 0.9266924858093262,
"learning_rate": 8.474439734584597e-06,
"loss": 0.611,
"step": 4385
},
{
"epoch": 0.28,
"grad_norm": 0.9059037566184998,
"learning_rate": 8.473701850886147e-06,
"loss": 0.6082,
"step": 4386
},
{
"epoch": 0.28,
"grad_norm": 0.8820655941963196,
"learning_rate": 8.472963820922195e-06,
"loss": 0.5618,
"step": 4387
},
{
"epoch": 0.28,
"grad_norm": 0.9292760491371155,
"learning_rate": 8.47222564472382e-06,
"loss": 0.636,
"step": 4388
},
{
"epoch": 0.28,
"grad_norm": 0.8835957050323486,
"learning_rate": 8.471487322322101e-06,
"loss": 0.5778,
"step": 4389
},
{
"epoch": 0.28,
"grad_norm": 0.8266465067863464,
"learning_rate": 8.47074885374813e-06,
"loss": 0.6343,
"step": 4390
},
{
"epoch": 0.28,
"grad_norm": 0.894709587097168,
"learning_rate": 8.470010239032995e-06,
"loss": 0.6356,
"step": 4391
},
{
"epoch": 0.28,
"grad_norm": 0.8928598761558533,
"learning_rate": 8.469271478207801e-06,
"loss": 0.5714,
"step": 4392
},
{
"epoch": 0.28,
"grad_norm": 0.8108189702033997,
"learning_rate": 8.468532571303655e-06,
"loss": 0.5671,
"step": 4393
},
{
"epoch": 0.28,
"grad_norm": 0.9048933386802673,
"learning_rate": 8.467793518351668e-06,
"loss": 0.6443,
"step": 4394
},
{
"epoch": 0.28,
"grad_norm": 0.9767211675643921,
"learning_rate": 8.46705431938296e-06,
"loss": 0.6349,
"step": 4395
},
{
"epoch": 0.28,
"grad_norm": 0.8677191138267517,
"learning_rate": 8.466314974428655e-06,
"loss": 0.6328,
"step": 4396
},
{
"epoch": 0.28,
"grad_norm": 0.8989687561988831,
"learning_rate": 8.465575483519883e-06,
"loss": 0.5977,
"step": 4397
},
{
"epoch": 0.28,
"grad_norm": 0.8818314075469971,
"learning_rate": 8.464835846687786e-06,
"loss": 0.6441,
"step": 4398
},
{
"epoch": 0.28,
"grad_norm": 0.8356281518936157,
"learning_rate": 8.464096063963503e-06,
"loss": 0.5723,
"step": 4399
},
{
"epoch": 0.28,
"grad_norm": 0.9221736192703247,
"learning_rate": 8.463356135378187e-06,
"loss": 0.5863,
"step": 4400
},
{
"epoch": 0.28,
"grad_norm": 0.9067344069480896,
"learning_rate": 8.462616060962992e-06,
"loss": 0.6029,
"step": 4401
},
{
"epoch": 0.28,
"grad_norm": 0.9068452715873718,
"learning_rate": 8.46187584074908e-06,
"loss": 0.6686,
"step": 4402
},
{
"epoch": 0.28,
"grad_norm": 0.8604983687400818,
"learning_rate": 8.461135474767618e-06,
"loss": 0.6051,
"step": 4403
},
{
"epoch": 0.28,
"grad_norm": 0.969758152961731,
"learning_rate": 8.460394963049784e-06,
"loss": 0.6334,
"step": 4404
},
{
"epoch": 0.28,
"grad_norm": 0.8745808005332947,
"learning_rate": 8.459654305626754e-06,
"loss": 0.6052,
"step": 4405
},
{
"epoch": 0.28,
"grad_norm": 0.8724889755249023,
"learning_rate": 8.458913502529718e-06,
"loss": 0.6038,
"step": 4406
},
{
"epoch": 0.28,
"grad_norm": 0.977708101272583,
"learning_rate": 8.458172553789866e-06,
"loss": 0.646,
"step": 4407
},
{
"epoch": 0.28,
"grad_norm": 0.900845468044281,
"learning_rate": 8.457431459438398e-06,
"loss": 0.6228,
"step": 4408
},
{
"epoch": 0.28,
"grad_norm": 0.9241088032722473,
"learning_rate": 8.456690219506519e-06,
"loss": 0.5887,
"step": 4409
},
{
"epoch": 0.28,
"grad_norm": 0.8947976231575012,
"learning_rate": 8.45594883402544e-06,
"loss": 0.6179,
"step": 4410
},
{
"epoch": 0.28,
"grad_norm": 0.9319069385528564,
"learning_rate": 8.455207303026378e-06,
"loss": 0.6356,
"step": 4411
},
{
"epoch": 0.28,
"grad_norm": 0.8791349530220032,
"learning_rate": 8.454465626540555e-06,
"loss": 0.5906,
"step": 4412
},
{
"epoch": 0.28,
"grad_norm": 0.9056016802787781,
"learning_rate": 8.453723804599203e-06,
"loss": 0.6095,
"step": 4413
},
{
"epoch": 0.28,
"grad_norm": 0.9093009233474731,
"learning_rate": 8.452981837233555e-06,
"loss": 0.6442,
"step": 4414
},
{
"epoch": 0.28,
"grad_norm": 0.9653396010398865,
"learning_rate": 8.452239724474856e-06,
"loss": 0.6397,
"step": 4415
},
{
"epoch": 0.28,
"grad_norm": 0.9115119576454163,
"learning_rate": 8.451497466354349e-06,
"loss": 0.5723,
"step": 4416
},
{
"epoch": 0.28,
"grad_norm": 0.9298482537269592,
"learning_rate": 8.450755062903293e-06,
"loss": 0.6244,
"step": 4417
},
{
"epoch": 0.28,
"grad_norm": 0.8901708126068115,
"learning_rate": 8.450012514152943e-06,
"loss": 0.6238,
"step": 4418
},
{
"epoch": 0.28,
"grad_norm": 0.8972589373588562,
"learning_rate": 8.44926982013457e-06,
"loss": 0.6162,
"step": 4419
},
{
"epoch": 0.28,
"grad_norm": 0.8598697185516357,
"learning_rate": 8.448526980879444e-06,
"loss": 0.5909,
"step": 4420
},
{
"epoch": 0.28,
"grad_norm": 1.0167523622512817,
"learning_rate": 8.447783996418843e-06,
"loss": 0.6784,
"step": 4421
},
{
"epoch": 0.28,
"grad_norm": 0.8606759905815125,
"learning_rate": 8.447040866784051e-06,
"loss": 0.5985,
"step": 4422
},
{
"epoch": 0.28,
"grad_norm": 0.9100238084793091,
"learning_rate": 8.446297592006361e-06,
"loss": 0.5486,
"step": 4423
},
{
"epoch": 0.28,
"grad_norm": 0.864998996257782,
"learning_rate": 8.445554172117066e-06,
"loss": 0.6308,
"step": 4424
},
{
"epoch": 0.28,
"grad_norm": 0.8984532356262207,
"learning_rate": 8.444810607147472e-06,
"loss": 0.5894,
"step": 4425
},
{
"epoch": 0.28,
"grad_norm": 0.8566537499427795,
"learning_rate": 8.444066897128888e-06,
"loss": 0.5764,
"step": 4426
},
{
"epoch": 0.28,
"grad_norm": 0.8784050941467285,
"learning_rate": 8.443323042092625e-06,
"loss": 0.5923,
"step": 4427
},
{
"epoch": 0.28,
"grad_norm": 0.9064181447029114,
"learning_rate": 8.442579042070011e-06,
"loss": 0.6279,
"step": 4428
},
{
"epoch": 0.28,
"grad_norm": 0.8186553120613098,
"learning_rate": 8.441834897092366e-06,
"loss": 0.6041,
"step": 4429
},
{
"epoch": 0.28,
"grad_norm": 0.9280451536178589,
"learning_rate": 8.44109060719103e-06,
"loss": 0.5901,
"step": 4430
},
{
"epoch": 0.28,
"grad_norm": 0.9555798172950745,
"learning_rate": 8.440346172397338e-06,
"loss": 0.672,
"step": 4431
},
{
"epoch": 0.28,
"grad_norm": 0.8926699161529541,
"learning_rate": 8.439601592742637e-06,
"loss": 0.6645,
"step": 4432
},
{
"epoch": 0.28,
"grad_norm": 0.8857988119125366,
"learning_rate": 8.438856868258278e-06,
"loss": 0.6439,
"step": 4433
},
{
"epoch": 0.28,
"grad_norm": 0.8523682951927185,
"learning_rate": 8.438111998975618e-06,
"loss": 0.6044,
"step": 4434
},
{
"epoch": 0.28,
"grad_norm": 0.8690520524978638,
"learning_rate": 8.437366984926023e-06,
"loss": 0.618,
"step": 4435
},
{
"epoch": 0.28,
"grad_norm": 0.8861067891120911,
"learning_rate": 8.436621826140863e-06,
"loss": 0.617,
"step": 4436
},
{
"epoch": 0.28,
"grad_norm": 0.8998048901557922,
"learning_rate": 8.435876522651512e-06,
"loss": 0.6881,
"step": 4437
},
{
"epoch": 0.28,
"grad_norm": 1.0284022092819214,
"learning_rate": 8.435131074489353e-06,
"loss": 0.6871,
"step": 4438
},
{
"epoch": 0.28,
"grad_norm": 0.8755271434783936,
"learning_rate": 8.434385481685776e-06,
"loss": 0.5637,
"step": 4439
},
{
"epoch": 0.28,
"grad_norm": 0.9131196737289429,
"learning_rate": 8.43363974427217e-06,
"loss": 0.6516,
"step": 4440
},
{
"epoch": 0.28,
"grad_norm": 0.8995763063430786,
"learning_rate": 8.432893862279943e-06,
"loss": 0.5847,
"step": 4441
},
{
"epoch": 0.28,
"grad_norm": 0.923299252986908,
"learning_rate": 8.432147835740496e-06,
"loss": 0.6213,
"step": 4442
},
{
"epoch": 0.28,
"grad_norm": 0.9042030572891235,
"learning_rate": 8.431401664685244e-06,
"loss": 0.6172,
"step": 4443
},
{
"epoch": 0.28,
"grad_norm": 0.963955283164978,
"learning_rate": 8.430655349145604e-06,
"loss": 0.6221,
"step": 4444
},
{
"epoch": 0.28,
"grad_norm": 0.9096510410308838,
"learning_rate": 8.429908889153003e-06,
"loss": 0.6646,
"step": 4445
},
{
"epoch": 0.28,
"grad_norm": 0.8882843852043152,
"learning_rate": 8.429162284738868e-06,
"loss": 0.6382,
"step": 4446
},
{
"epoch": 0.28,
"grad_norm": 0.8437566757202148,
"learning_rate": 8.42841553593464e-06,
"loss": 0.6169,
"step": 4447
},
{
"epoch": 0.28,
"grad_norm": 0.8963313102722168,
"learning_rate": 8.42766864277176e-06,
"loss": 0.6054,
"step": 4448
},
{
"epoch": 0.28,
"grad_norm": 0.8515428900718689,
"learning_rate": 8.426921605281677e-06,
"loss": 0.6261,
"step": 4449
},
{
"epoch": 0.28,
"grad_norm": 0.9076332449913025,
"learning_rate": 8.426174423495848e-06,
"loss": 0.6133,
"step": 4450
},
{
"epoch": 0.28,
"grad_norm": 0.9798647165298462,
"learning_rate": 8.425427097445733e-06,
"loss": 0.6373,
"step": 4451
},
{
"epoch": 0.28,
"grad_norm": 0.8840082883834839,
"learning_rate": 8.424679627162798e-06,
"loss": 0.65,
"step": 4452
},
{
"epoch": 0.28,
"grad_norm": 0.8393424153327942,
"learning_rate": 8.423932012678516e-06,
"loss": 0.5844,
"step": 4453
},
{
"epoch": 0.28,
"grad_norm": 0.9224118590354919,
"learning_rate": 8.42318425402437e-06,
"loss": 0.6875,
"step": 4454
},
{
"epoch": 0.28,
"grad_norm": 0.8217747211456299,
"learning_rate": 8.422436351231843e-06,
"loss": 0.5858,
"step": 4455
},
{
"epoch": 0.28,
"grad_norm": 0.8549429774284363,
"learning_rate": 8.421688304332428e-06,
"loss": 0.5739,
"step": 4456
},
{
"epoch": 0.28,
"grad_norm": 0.88507080078125,
"learning_rate": 8.42094011335762e-06,
"loss": 0.5718,
"step": 4457
},
{
"epoch": 0.28,
"grad_norm": 0.9432583451271057,
"learning_rate": 8.420191778338924e-06,
"loss": 0.5703,
"step": 4458
},
{
"epoch": 0.28,
"grad_norm": 0.893008291721344,
"learning_rate": 8.419443299307852e-06,
"loss": 0.6452,
"step": 4459
},
{
"epoch": 0.28,
"grad_norm": 0.8943834900856018,
"learning_rate": 8.418694676295918e-06,
"loss": 0.5895,
"step": 4460
},
{
"epoch": 0.28,
"grad_norm": 0.8623561859130859,
"learning_rate": 8.417945909334642e-06,
"loss": 0.6079,
"step": 4461
},
{
"epoch": 0.28,
"grad_norm": 0.8554010987281799,
"learning_rate": 8.417196998455555e-06,
"loss": 0.6034,
"step": 4462
},
{
"epoch": 0.28,
"grad_norm": 0.8964874148368835,
"learning_rate": 8.41644794369019e-06,
"loss": 0.5709,
"step": 4463
},
{
"epoch": 0.28,
"grad_norm": 0.8765043616294861,
"learning_rate": 8.415698745070088e-06,
"loss": 0.5924,
"step": 4464
},
{
"epoch": 0.28,
"grad_norm": 0.9031361937522888,
"learning_rate": 8.414949402626793e-06,
"loss": 0.644,
"step": 4465
},
{
"epoch": 0.28,
"grad_norm": 0.8381129503250122,
"learning_rate": 8.41419991639186e-06,
"loss": 0.5794,
"step": 4466
},
{
"epoch": 0.28,
"grad_norm": 0.9276309013366699,
"learning_rate": 8.413450286396845e-06,
"loss": 0.5939,
"step": 4467
},
{
"epoch": 0.28,
"grad_norm": 0.821047306060791,
"learning_rate": 8.41270051267331e-06,
"loss": 0.5748,
"step": 4468
},
{
"epoch": 0.28,
"grad_norm": 0.8938078880310059,
"learning_rate": 8.411950595252834e-06,
"loss": 0.613,
"step": 4469
},
{
"epoch": 0.28,
"grad_norm": 0.9239148497581482,
"learning_rate": 8.411200534166983e-06,
"loss": 0.6725,
"step": 4470
},
{
"epoch": 0.28,
"grad_norm": 0.8708427548408508,
"learning_rate": 8.410450329447346e-06,
"loss": 0.6503,
"step": 4471
},
{
"epoch": 0.28,
"grad_norm": 0.9489243626594543,
"learning_rate": 8.409699981125509e-06,
"loss": 0.6561,
"step": 4472
},
{
"epoch": 0.28,
"grad_norm": 0.9252210259437561,
"learning_rate": 8.408949489233068e-06,
"loss": 0.6548,
"step": 4473
},
{
"epoch": 0.28,
"grad_norm": 0.8737644553184509,
"learning_rate": 8.408198853801623e-06,
"loss": 0.5992,
"step": 4474
},
{
"epoch": 0.28,
"grad_norm": 0.9438381195068359,
"learning_rate": 8.40744807486278e-06,
"loss": 0.6524,
"step": 4475
},
{
"epoch": 0.28,
"grad_norm": 0.8789763450622559,
"learning_rate": 8.406697152448152e-06,
"loss": 0.6056,
"step": 4476
},
{
"epoch": 0.28,
"grad_norm": 0.9246413707733154,
"learning_rate": 8.405946086589359e-06,
"loss": 0.6097,
"step": 4477
},
{
"epoch": 0.28,
"grad_norm": 0.9346416592597961,
"learning_rate": 8.405194877318023e-06,
"loss": 0.6877,
"step": 4478
},
{
"epoch": 0.28,
"grad_norm": 0.8847804069519043,
"learning_rate": 8.404443524665777e-06,
"loss": 0.6492,
"step": 4479
},
{
"epoch": 0.28,
"grad_norm": 0.8874092698097229,
"learning_rate": 8.40369202866426e-06,
"loss": 0.5755,
"step": 4480
},
{
"epoch": 0.28,
"grad_norm": 0.9419736266136169,
"learning_rate": 8.40294038934511e-06,
"loss": 0.6488,
"step": 4481
},
{
"epoch": 0.28,
"grad_norm": 0.8550480604171753,
"learning_rate": 8.402188606739977e-06,
"loss": 0.5936,
"step": 4482
},
{
"epoch": 0.28,
"grad_norm": 0.9512335062026978,
"learning_rate": 8.401436680880518e-06,
"loss": 0.6031,
"step": 4483
},
{
"epoch": 0.28,
"grad_norm": 0.8816537857055664,
"learning_rate": 8.400684611798395e-06,
"loss": 0.5836,
"step": 4484
},
{
"epoch": 0.28,
"grad_norm": 0.8356591463088989,
"learning_rate": 8.39993239952527e-06,
"loss": 0.5666,
"step": 4485
},
{
"epoch": 0.28,
"grad_norm": 0.9029728770256042,
"learning_rate": 8.399180044092821e-06,
"loss": 0.5819,
"step": 4486
},
{
"epoch": 0.28,
"grad_norm": 0.9611971378326416,
"learning_rate": 8.398427545532726e-06,
"loss": 0.589,
"step": 4487
},
{
"epoch": 0.28,
"grad_norm": 0.9153091907501221,
"learning_rate": 8.397674903876667e-06,
"loss": 0.6034,
"step": 4488
},
{
"epoch": 0.28,
"grad_norm": 0.9280160665512085,
"learning_rate": 8.396922119156339e-06,
"loss": 0.6534,
"step": 4489
},
{
"epoch": 0.28,
"grad_norm": 0.9492883086204529,
"learning_rate": 8.396169191403438e-06,
"loss": 0.6022,
"step": 4490
},
{
"epoch": 0.28,
"grad_norm": 0.9128872156143188,
"learning_rate": 8.395416120649667e-06,
"loss": 0.5807,
"step": 4491
},
{
"epoch": 0.28,
"grad_norm": 0.8702619075775146,
"learning_rate": 8.394662906926734e-06,
"loss": 0.5689,
"step": 4492
},
{
"epoch": 0.28,
"grad_norm": 0.8514307141304016,
"learning_rate": 8.393909550266354e-06,
"loss": 0.6161,
"step": 4493
},
{
"epoch": 0.28,
"grad_norm": 0.8472135663032532,
"learning_rate": 8.393156050700252e-06,
"loss": 0.626,
"step": 4494
},
{
"epoch": 0.28,
"grad_norm": 0.8964636921882629,
"learning_rate": 8.39240240826015e-06,
"loss": 0.6282,
"step": 4495
},
{
"epoch": 0.28,
"grad_norm": 0.9277433753013611,
"learning_rate": 8.391648622977787e-06,
"loss": 0.6707,
"step": 4496
},
{
"epoch": 0.28,
"grad_norm": 0.8764444589614868,
"learning_rate": 8.390894694884896e-06,
"loss": 0.5962,
"step": 4497
},
{
"epoch": 0.28,
"grad_norm": 0.9093109965324402,
"learning_rate": 8.390140624013228e-06,
"loss": 0.6039,
"step": 4498
},
{
"epoch": 0.29,
"grad_norm": 0.9418292045593262,
"learning_rate": 8.38938641039453e-06,
"loss": 0.6415,
"step": 4499
},
{
"epoch": 0.29,
"grad_norm": 0.8592790961265564,
"learning_rate": 8.388632054060562e-06,
"loss": 0.5807,
"step": 4500
},
{
"epoch": 0.29,
"grad_norm": 0.9306639432907104,
"learning_rate": 8.387877555043086e-06,
"loss": 0.6477,
"step": 4501
},
{
"epoch": 0.29,
"grad_norm": 0.8802691698074341,
"learning_rate": 8.38712291337387e-06,
"loss": 0.6043,
"step": 4502
},
{
"epoch": 0.29,
"grad_norm": 0.8935637474060059,
"learning_rate": 8.386368129084695e-06,
"loss": 0.5958,
"step": 4503
},
{
"epoch": 0.29,
"grad_norm": 0.9453160166740417,
"learning_rate": 8.385613202207336e-06,
"loss": 0.6246,
"step": 4504
},
{
"epoch": 0.29,
"grad_norm": 0.8884761929512024,
"learning_rate": 8.384858132773582e-06,
"loss": 0.6038,
"step": 4505
},
{
"epoch": 0.29,
"grad_norm": 0.8889615535736084,
"learning_rate": 8.38410292081523e-06,
"loss": 0.563,
"step": 4506
},
{
"epoch": 0.29,
"grad_norm": 0.8603700995445251,
"learning_rate": 8.383347566364072e-06,
"loss": 0.5701,
"step": 4507
},
{
"epoch": 0.29,
"grad_norm": 0.8758067488670349,
"learning_rate": 8.38259206945192e-06,
"loss": 0.6018,
"step": 4508
},
{
"epoch": 0.29,
"grad_norm": 0.9259410500526428,
"learning_rate": 8.381836430110585e-06,
"loss": 0.5987,
"step": 4509
},
{
"epoch": 0.29,
"grad_norm": 0.913033127784729,
"learning_rate": 8.38108064837188e-06,
"loss": 0.6485,
"step": 4510
},
{
"epoch": 0.29,
"grad_norm": 0.88724285364151,
"learning_rate": 8.380324724267631e-06,
"loss": 0.6141,
"step": 4511
},
{
"epoch": 0.29,
"grad_norm": 0.8932639360427856,
"learning_rate": 8.379568657829669e-06,
"loss": 0.6432,
"step": 4512
},
{
"epoch": 0.29,
"grad_norm": 0.8454581499099731,
"learning_rate": 8.378812449089826e-06,
"loss": 0.6136,
"step": 4513
},
{
"epoch": 0.29,
"grad_norm": 0.9295586943626404,
"learning_rate": 8.378056098079946e-06,
"loss": 0.6594,
"step": 4514
},
{
"epoch": 0.29,
"grad_norm": 0.9568715691566467,
"learning_rate": 8.377299604831875e-06,
"loss": 0.664,
"step": 4515
},
{
"epoch": 0.29,
"grad_norm": 0.8817077875137329,
"learning_rate": 8.376542969377465e-06,
"loss": 0.6482,
"step": 4516
},
{
"epoch": 0.29,
"grad_norm": 0.924589216709137,
"learning_rate": 8.375786191748578e-06,
"loss": 0.6711,
"step": 4517
},
{
"epoch": 0.29,
"grad_norm": 0.9990555047988892,
"learning_rate": 8.375029271977076e-06,
"loss": 0.6757,
"step": 4518
},
{
"epoch": 0.29,
"grad_norm": 0.8449356555938721,
"learning_rate": 8.374272210094834e-06,
"loss": 0.6219,
"step": 4519
},
{
"epoch": 0.29,
"grad_norm": 0.8764523863792419,
"learning_rate": 8.373515006133728e-06,
"loss": 0.5765,
"step": 4520
},
{
"epoch": 0.29,
"grad_norm": 0.9035282731056213,
"learning_rate": 8.372757660125639e-06,
"loss": 0.5511,
"step": 4521
},
{
"epoch": 0.29,
"grad_norm": 0.9758896231651306,
"learning_rate": 8.372000172102459e-06,
"loss": 0.654,
"step": 4522
},
{
"epoch": 0.29,
"grad_norm": 0.9765152931213379,
"learning_rate": 8.37124254209608e-06,
"loss": 0.6125,
"step": 4523
},
{
"epoch": 0.29,
"grad_norm": 0.8869422674179077,
"learning_rate": 8.370484770138407e-06,
"loss": 0.6502,
"step": 4524
},
{
"epoch": 0.29,
"grad_norm": 0.9507737755775452,
"learning_rate": 8.369726856261346e-06,
"loss": 0.6427,
"step": 4525
},
{
"epoch": 0.29,
"grad_norm": 0.9340800642967224,
"learning_rate": 8.36896880049681e-06,
"loss": 0.576,
"step": 4526
},
{
"epoch": 0.29,
"grad_norm": 0.9077014327049255,
"learning_rate": 8.368210602876716e-06,
"loss": 0.5923,
"step": 4527
},
{
"epoch": 0.29,
"grad_norm": 0.8733184933662415,
"learning_rate": 8.36745226343299e-06,
"loss": 0.6116,
"step": 4528
},
{
"epoch": 0.29,
"grad_norm": 0.9413378834724426,
"learning_rate": 8.366693782197566e-06,
"loss": 0.6095,
"step": 4529
},
{
"epoch": 0.29,
"grad_norm": 0.9507108330726624,
"learning_rate": 8.365935159202378e-06,
"loss": 0.6222,
"step": 4530
},
{
"epoch": 0.29,
"grad_norm": 0.950071394443512,
"learning_rate": 8.365176394479368e-06,
"loss": 0.6427,
"step": 4531
},
{
"epoch": 0.29,
"grad_norm": 0.8926099538803101,
"learning_rate": 8.364417488060488e-06,
"loss": 0.6253,
"step": 4532
},
{
"epoch": 0.29,
"grad_norm": 0.8760389089584351,
"learning_rate": 8.363658439977693e-06,
"loss": 0.5829,
"step": 4533
},
{
"epoch": 0.29,
"grad_norm": 0.8548893332481384,
"learning_rate": 8.36289925026294e-06,
"loss": 0.5992,
"step": 4534
},
{
"epoch": 0.29,
"grad_norm": 0.9305916428565979,
"learning_rate": 8.362139918948198e-06,
"loss": 0.6084,
"step": 4535
},
{
"epoch": 0.29,
"grad_norm": 0.9650013446807861,
"learning_rate": 8.36138044606544e-06,
"loss": 0.6429,
"step": 4536
},
{
"epoch": 0.29,
"grad_norm": 0.8791600465774536,
"learning_rate": 8.360620831646647e-06,
"loss": 0.6104,
"step": 4537
},
{
"epoch": 0.29,
"grad_norm": 0.8988505601882935,
"learning_rate": 8.359861075723801e-06,
"loss": 0.6117,
"step": 4538
},
{
"epoch": 0.29,
"grad_norm": 0.9081864953041077,
"learning_rate": 8.359101178328893e-06,
"loss": 0.6432,
"step": 4539
},
{
"epoch": 0.29,
"grad_norm": 0.8397430181503296,
"learning_rate": 8.358341139493919e-06,
"loss": 0.5822,
"step": 4540
},
{
"epoch": 0.29,
"grad_norm": 0.9269049167633057,
"learning_rate": 8.357580959250882e-06,
"loss": 0.5718,
"step": 4541
},
{
"epoch": 0.29,
"grad_norm": 0.9173187017440796,
"learning_rate": 8.356820637631792e-06,
"loss": 0.6343,
"step": 4542
},
{
"epoch": 0.29,
"grad_norm": 0.9146298766136169,
"learning_rate": 8.356060174668663e-06,
"loss": 0.5987,
"step": 4543
},
{
"epoch": 0.29,
"grad_norm": 0.8490142226219177,
"learning_rate": 8.355299570393515e-06,
"loss": 0.5537,
"step": 4544
},
{
"epoch": 0.29,
"grad_norm": 0.9867364764213562,
"learning_rate": 8.354538824838373e-06,
"loss": 0.6229,
"step": 4545
},
{
"epoch": 0.29,
"grad_norm": 0.8394815921783447,
"learning_rate": 8.353777938035272e-06,
"loss": 0.5803,
"step": 4546
},
{
"epoch": 0.29,
"grad_norm": 0.9035863280296326,
"learning_rate": 8.353016910016247e-06,
"loss": 0.6028,
"step": 4547
},
{
"epoch": 0.29,
"grad_norm": 0.9454771876335144,
"learning_rate": 8.352255740813347e-06,
"loss": 0.6281,
"step": 4548
},
{
"epoch": 0.29,
"grad_norm": 0.8816177845001221,
"learning_rate": 8.351494430458617e-06,
"loss": 0.5853,
"step": 4549
},
{
"epoch": 0.29,
"grad_norm": 0.9621097445487976,
"learning_rate": 8.350732978984116e-06,
"loss": 0.64,
"step": 4550
},
{
"epoch": 0.29,
"grad_norm": 0.8992953896522522,
"learning_rate": 8.349971386421906e-06,
"loss": 0.609,
"step": 4551
},
{
"epoch": 0.29,
"grad_norm": 0.8685299754142761,
"learning_rate": 8.349209652804055e-06,
"loss": 0.5633,
"step": 4552
},
{
"epoch": 0.29,
"grad_norm": 0.8441104292869568,
"learning_rate": 8.348447778162636e-06,
"loss": 0.6342,
"step": 4553
},
{
"epoch": 0.29,
"grad_norm": 0.8986367583274841,
"learning_rate": 8.347685762529729e-06,
"loss": 0.6462,
"step": 4554
},
{
"epoch": 0.29,
"grad_norm": 0.8934696316719055,
"learning_rate": 8.34692360593742e-06,
"loss": 0.5742,
"step": 4555
},
{
"epoch": 0.29,
"grad_norm": 0.9160881042480469,
"learning_rate": 8.346161308417805e-06,
"loss": 0.6352,
"step": 4556
},
{
"epoch": 0.29,
"grad_norm": 0.8555467128753662,
"learning_rate": 8.345398870002972e-06,
"loss": 0.6251,
"step": 4557
},
{
"epoch": 0.29,
"grad_norm": 0.9095616340637207,
"learning_rate": 8.344636290725035e-06,
"loss": 0.6466,
"step": 4558
},
{
"epoch": 0.29,
"grad_norm": 0.8506302237510681,
"learning_rate": 8.343873570616097e-06,
"loss": 0.6207,
"step": 4559
},
{
"epoch": 0.29,
"grad_norm": 0.8929101228713989,
"learning_rate": 8.343110709708275e-06,
"loss": 0.6406,
"step": 4560
},
{
"epoch": 0.29,
"grad_norm": 0.919562816619873,
"learning_rate": 8.342347708033692e-06,
"loss": 0.6297,
"step": 4561
},
{
"epoch": 0.29,
"grad_norm": 0.9147757291793823,
"learning_rate": 8.341584565624471e-06,
"loss": 0.6507,
"step": 4562
},
{
"epoch": 0.29,
"grad_norm": 0.8911783695220947,
"learning_rate": 8.340821282512753e-06,
"loss": 0.6204,
"step": 4563
},
{
"epoch": 0.29,
"grad_norm": 0.9014183878898621,
"learning_rate": 8.34005785873067e-06,
"loss": 0.5942,
"step": 4564
},
{
"epoch": 0.29,
"grad_norm": 0.9250972270965576,
"learning_rate": 8.339294294310371e-06,
"loss": 0.6221,
"step": 4565
},
{
"epoch": 0.29,
"grad_norm": 0.8855701684951782,
"learning_rate": 8.338530589284005e-06,
"loss": 0.6531,
"step": 4566
},
{
"epoch": 0.29,
"grad_norm": 0.8788840174674988,
"learning_rate": 8.33776674368373e-06,
"loss": 0.5342,
"step": 4567
},
{
"epoch": 0.29,
"grad_norm": 0.9645684361457825,
"learning_rate": 8.337002757541708e-06,
"loss": 0.6643,
"step": 4568
},
{
"epoch": 0.29,
"grad_norm": 0.8790433406829834,
"learning_rate": 8.33623863089011e-06,
"loss": 0.5891,
"step": 4569
},
{
"epoch": 0.29,
"grad_norm": 0.8718952536582947,
"learning_rate": 8.335474363761109e-06,
"loss": 0.5621,
"step": 4570
},
{
"epoch": 0.29,
"grad_norm": 0.9348157048225403,
"learning_rate": 8.334709956186884e-06,
"loss": 0.6079,
"step": 4571
},
{
"epoch": 0.29,
"grad_norm": 0.8799747824668884,
"learning_rate": 8.333945408199624e-06,
"loss": 0.5964,
"step": 4572
},
{
"epoch": 0.29,
"grad_norm": 0.8926383852958679,
"learning_rate": 8.333180719831521e-06,
"loss": 0.6197,
"step": 4573
},
{
"epoch": 0.29,
"grad_norm": 0.9374673962593079,
"learning_rate": 8.332415891114774e-06,
"loss": 0.6275,
"step": 4574
},
{
"epoch": 0.29,
"grad_norm": 0.9162465333938599,
"learning_rate": 8.331650922081586e-06,
"loss": 0.5774,
"step": 4575
},
{
"epoch": 0.29,
"grad_norm": 0.8876767158508301,
"learning_rate": 8.330885812764168e-06,
"loss": 0.6274,
"step": 4576
},
{
"epoch": 0.29,
"grad_norm": 0.8842494487762451,
"learning_rate": 8.330120563194736e-06,
"loss": 0.6357,
"step": 4577
},
{
"epoch": 0.29,
"grad_norm": 0.8948314189910889,
"learning_rate": 8.32935517340551e-06,
"loss": 0.6393,
"step": 4578
},
{
"epoch": 0.29,
"grad_norm": 0.8375378251075745,
"learning_rate": 8.328589643428722e-06,
"loss": 0.5923,
"step": 4579
},
{
"epoch": 0.29,
"grad_norm": 0.904406726360321,
"learning_rate": 8.327823973296601e-06,
"loss": 0.6558,
"step": 4580
},
{
"epoch": 0.29,
"grad_norm": 0.9058637619018555,
"learning_rate": 8.32705816304139e-06,
"loss": 0.6308,
"step": 4581
},
{
"epoch": 0.29,
"grad_norm": 0.8600705862045288,
"learning_rate": 8.326292212695335e-06,
"loss": 0.663,
"step": 4582
},
{
"epoch": 0.29,
"grad_norm": 0.9292261004447937,
"learning_rate": 8.325526122290685e-06,
"loss": 0.6219,
"step": 4583
},
{
"epoch": 0.29,
"grad_norm": 0.8440708518028259,
"learning_rate": 8.3247598918597e-06,
"loss": 0.5731,
"step": 4584
},
{
"epoch": 0.29,
"grad_norm": 0.9342061281204224,
"learning_rate": 8.323993521434639e-06,
"loss": 0.6052,
"step": 4585
},
{
"epoch": 0.29,
"grad_norm": 0.9192177057266235,
"learning_rate": 8.323227011047777e-06,
"loss": 0.5993,
"step": 4586
},
{
"epoch": 0.29,
"grad_norm": 0.8514859676361084,
"learning_rate": 8.322460360731386e-06,
"loss": 0.6584,
"step": 4587
},
{
"epoch": 0.29,
"grad_norm": 0.8630070686340332,
"learning_rate": 8.321693570517745e-06,
"loss": 0.6247,
"step": 4588
},
{
"epoch": 0.29,
"grad_norm": 0.9018881320953369,
"learning_rate": 8.320926640439145e-06,
"loss": 0.5849,
"step": 4589
},
{
"epoch": 0.29,
"grad_norm": 0.8964559435844421,
"learning_rate": 8.320159570527876e-06,
"loss": 0.6018,
"step": 4590
},
{
"epoch": 0.29,
"grad_norm": 0.8430085182189941,
"learning_rate": 8.319392360816239e-06,
"loss": 0.6033,
"step": 4591
},
{
"epoch": 0.29,
"grad_norm": 0.8777481913566589,
"learning_rate": 8.318625011336533e-06,
"loss": 0.6466,
"step": 4592
},
{
"epoch": 0.29,
"grad_norm": 0.9354075789451599,
"learning_rate": 8.317857522121078e-06,
"loss": 0.6187,
"step": 4593
},
{
"epoch": 0.29,
"grad_norm": 0.9195157289505005,
"learning_rate": 8.317089893202181e-06,
"loss": 0.6094,
"step": 4594
},
{
"epoch": 0.29,
"grad_norm": 0.9029771089553833,
"learning_rate": 8.316322124612169e-06,
"loss": 0.6705,
"step": 4595
},
{
"epoch": 0.29,
"grad_norm": 0.9089044332504272,
"learning_rate": 8.315554216383368e-06,
"loss": 0.657,
"step": 4596
},
{
"epoch": 0.29,
"grad_norm": 0.9705564975738525,
"learning_rate": 8.314786168548115e-06,
"loss": 0.645,
"step": 4597
},
{
"epoch": 0.29,
"grad_norm": 0.8919417858123779,
"learning_rate": 8.314017981138746e-06,
"loss": 0.5922,
"step": 4598
},
{
"epoch": 0.29,
"grad_norm": 0.9287596344947815,
"learning_rate": 8.31324965418761e-06,
"loss": 0.629,
"step": 4599
},
{
"epoch": 0.29,
"grad_norm": 0.8940380811691284,
"learning_rate": 8.312481187727055e-06,
"loss": 0.6292,
"step": 4600
},
{
"epoch": 0.29,
"grad_norm": 0.8414455652236938,
"learning_rate": 8.311712581789442e-06,
"loss": 0.5379,
"step": 4601
},
{
"epoch": 0.29,
"grad_norm": 0.8981665372848511,
"learning_rate": 8.310943836407132e-06,
"loss": 0.6239,
"step": 4602
},
{
"epoch": 0.29,
"grad_norm": 0.9226404428482056,
"learning_rate": 8.310174951612495e-06,
"loss": 0.5864,
"step": 4603
},
{
"epoch": 0.29,
"grad_norm": 0.8723615407943726,
"learning_rate": 8.309405927437906e-06,
"loss": 0.5485,
"step": 4604
},
{
"epoch": 0.29,
"grad_norm": 0.8954591751098633,
"learning_rate": 8.308636763915746e-06,
"loss": 0.6198,
"step": 4605
},
{
"epoch": 0.29,
"grad_norm": 0.8918243050575256,
"learning_rate": 8.307867461078402e-06,
"loss": 0.6386,
"step": 4606
},
{
"epoch": 0.29,
"grad_norm": 0.9272078275680542,
"learning_rate": 8.307098018958266e-06,
"loss": 0.6456,
"step": 4607
},
{
"epoch": 0.29,
"grad_norm": 0.8392652869224548,
"learning_rate": 8.306328437587738e-06,
"loss": 0.6253,
"step": 4608
},
{
"epoch": 0.29,
"grad_norm": 0.8458937406539917,
"learning_rate": 8.305558716999221e-06,
"loss": 0.619,
"step": 4609
},
{
"epoch": 0.29,
"grad_norm": 0.9669510126113892,
"learning_rate": 8.304788857225126e-06,
"loss": 0.6159,
"step": 4610
},
{
"epoch": 0.29,
"grad_norm": 0.8691350221633911,
"learning_rate": 8.304018858297867e-06,
"loss": 0.5951,
"step": 4611
},
{
"epoch": 0.29,
"grad_norm": 0.9048541784286499,
"learning_rate": 8.30324872024987e-06,
"loss": 0.6122,
"step": 4612
},
{
"epoch": 0.29,
"grad_norm": 0.897702693939209,
"learning_rate": 8.30247844311356e-06,
"loss": 0.5482,
"step": 4613
},
{
"epoch": 0.29,
"grad_norm": 0.967581570148468,
"learning_rate": 8.301708026921371e-06,
"loss": 0.6607,
"step": 4614
},
{
"epoch": 0.29,
"grad_norm": 0.9215171933174133,
"learning_rate": 8.300937471705742e-06,
"loss": 0.6724,
"step": 4615
},
{
"epoch": 0.29,
"grad_norm": 0.8553723692893982,
"learning_rate": 8.300166777499119e-06,
"loss": 0.6192,
"step": 4616
},
{
"epoch": 0.29,
"grad_norm": 0.9200363755226135,
"learning_rate": 8.299395944333955e-06,
"loss": 0.6755,
"step": 4617
},
{
"epoch": 0.29,
"grad_norm": 0.883851170539856,
"learning_rate": 8.298624972242704e-06,
"loss": 0.5919,
"step": 4618
},
{
"epoch": 0.29,
"grad_norm": 0.9365254044532776,
"learning_rate": 8.297853861257831e-06,
"loss": 0.5648,
"step": 4619
},
{
"epoch": 0.29,
"grad_norm": 0.9034328460693359,
"learning_rate": 8.297082611411805e-06,
"loss": 0.6108,
"step": 4620
},
{
"epoch": 0.29,
"grad_norm": 0.8502330780029297,
"learning_rate": 8.296311222737099e-06,
"loss": 0.5599,
"step": 4621
},
{
"epoch": 0.29,
"grad_norm": 0.905636727809906,
"learning_rate": 8.295539695266195e-06,
"loss": 0.5831,
"step": 4622
},
{
"epoch": 0.29,
"grad_norm": 0.8426777124404907,
"learning_rate": 8.29476802903158e-06,
"loss": 0.5742,
"step": 4623
},
{
"epoch": 0.29,
"grad_norm": 0.8838980793952942,
"learning_rate": 8.293996224065742e-06,
"loss": 0.6093,
"step": 4624
},
{
"epoch": 0.29,
"grad_norm": 0.866264283657074,
"learning_rate": 8.293224280401185e-06,
"loss": 0.6257,
"step": 4625
},
{
"epoch": 0.29,
"grad_norm": 0.9148405194282532,
"learning_rate": 8.292452198070406e-06,
"loss": 0.6198,
"step": 4626
},
{
"epoch": 0.29,
"grad_norm": 0.8394678831100464,
"learning_rate": 8.291679977105922e-06,
"loss": 0.5995,
"step": 4627
},
{
"epoch": 0.29,
"grad_norm": 0.858686089515686,
"learning_rate": 8.290907617540244e-06,
"loss": 0.5912,
"step": 4628
},
{
"epoch": 0.29,
"grad_norm": 0.9442601203918457,
"learning_rate": 8.290135119405894e-06,
"loss": 0.5746,
"step": 4629
},
{
"epoch": 0.29,
"grad_norm": 0.8091254234313965,
"learning_rate": 8.2893624827354e-06,
"loss": 0.5738,
"step": 4630
},
{
"epoch": 0.29,
"grad_norm": 0.8589221835136414,
"learning_rate": 8.288589707561295e-06,
"loss": 0.5908,
"step": 4631
},
{
"epoch": 0.29,
"grad_norm": 0.8597394824028015,
"learning_rate": 8.287816793916119e-06,
"loss": 0.6029,
"step": 4632
},
{
"epoch": 0.29,
"grad_norm": 0.9113194942474365,
"learning_rate": 8.287043741832412e-06,
"loss": 0.6494,
"step": 4633
},
{
"epoch": 0.29,
"grad_norm": 0.8626760244369507,
"learning_rate": 8.28627055134273e-06,
"loss": 0.5452,
"step": 4634
},
{
"epoch": 0.29,
"grad_norm": 0.8131372332572937,
"learning_rate": 8.285497222479626e-06,
"loss": 0.5791,
"step": 4635
},
{
"epoch": 0.29,
"grad_norm": 0.8600938320159912,
"learning_rate": 8.284723755275666e-06,
"loss": 0.6142,
"step": 4636
},
{
"epoch": 0.29,
"grad_norm": 0.8754161596298218,
"learning_rate": 8.283950149763413e-06,
"loss": 0.571,
"step": 4637
},
{
"epoch": 0.29,
"grad_norm": 0.8484256863594055,
"learning_rate": 8.283176405975444e-06,
"loss": 0.5876,
"step": 4638
},
{
"epoch": 0.29,
"grad_norm": 0.8761142492294312,
"learning_rate": 8.282402523944338e-06,
"loss": 0.5813,
"step": 4639
},
{
"epoch": 0.29,
"grad_norm": 0.8704332709312439,
"learning_rate": 8.28162850370268e-06,
"loss": 0.5611,
"step": 4640
},
{
"epoch": 0.29,
"grad_norm": 0.9601176977157593,
"learning_rate": 8.28085434528306e-06,
"loss": 0.6592,
"step": 4641
},
{
"epoch": 0.29,
"grad_norm": 0.9141191244125366,
"learning_rate": 8.28008004871808e-06,
"loss": 0.5754,
"step": 4642
},
{
"epoch": 0.29,
"grad_norm": 0.8856356143951416,
"learning_rate": 8.279305614040337e-06,
"loss": 0.6057,
"step": 4643
},
{
"epoch": 0.29,
"grad_norm": 0.8992973566055298,
"learning_rate": 8.278531041282445e-06,
"loss": 0.5995,
"step": 4644
},
{
"epoch": 0.29,
"grad_norm": 0.9188979864120483,
"learning_rate": 8.277756330477013e-06,
"loss": 0.6201,
"step": 4645
},
{
"epoch": 0.29,
"grad_norm": 0.9234612584114075,
"learning_rate": 8.276981481656668e-06,
"loss": 0.6256,
"step": 4646
},
{
"epoch": 0.29,
"grad_norm": 0.9108220934867859,
"learning_rate": 8.276206494854029e-06,
"loss": 0.6551,
"step": 4647
},
{
"epoch": 0.29,
"grad_norm": 0.8664566874504089,
"learning_rate": 8.275431370101734e-06,
"loss": 0.5819,
"step": 4648
},
{
"epoch": 0.29,
"grad_norm": 0.9078052639961243,
"learning_rate": 8.274656107432418e-06,
"loss": 0.6217,
"step": 4649
},
{
"epoch": 0.29,
"grad_norm": 0.8800520896911621,
"learning_rate": 8.273880706878724e-06,
"loss": 0.6276,
"step": 4650
},
{
"epoch": 0.29,
"grad_norm": 0.977785587310791,
"learning_rate": 8.273105168473304e-06,
"loss": 0.5943,
"step": 4651
},
{
"epoch": 0.29,
"grad_norm": 0.9266806840896606,
"learning_rate": 8.27232949224881e-06,
"loss": 0.6371,
"step": 4652
},
{
"epoch": 0.29,
"grad_norm": 0.9237861037254333,
"learning_rate": 8.271553678237904e-06,
"loss": 0.6344,
"step": 4653
},
{
"epoch": 0.29,
"grad_norm": 0.9259735345840454,
"learning_rate": 8.270777726473256e-06,
"loss": 0.5926,
"step": 4654
},
{
"epoch": 0.29,
"grad_norm": 0.9366374611854553,
"learning_rate": 8.270001636987535e-06,
"loss": 0.5992,
"step": 4655
},
{
"epoch": 0.29,
"grad_norm": 0.9009166359901428,
"learning_rate": 8.26922540981342e-06,
"loss": 0.6138,
"step": 4656
},
{
"epoch": 0.3,
"grad_norm": 0.8974087834358215,
"learning_rate": 8.268449044983598e-06,
"loss": 0.5916,
"step": 4657
},
{
"epoch": 0.3,
"grad_norm": 0.8280764222145081,
"learning_rate": 8.267672542530753e-06,
"loss": 0.6036,
"step": 4658
},
{
"epoch": 0.3,
"grad_norm": 0.8438900709152222,
"learning_rate": 8.266895902487588e-06,
"loss": 0.5817,
"step": 4659
},
{
"epoch": 0.3,
"grad_norm": 0.8856135010719299,
"learning_rate": 8.2661191248868e-06,
"loss": 0.6245,
"step": 4660
},
{
"epoch": 0.3,
"grad_norm": 0.8787485957145691,
"learning_rate": 8.265342209761098e-06,
"loss": 0.6138,
"step": 4661
},
{
"epoch": 0.3,
"grad_norm": 0.8637370467185974,
"learning_rate": 8.264565157143194e-06,
"loss": 0.6444,
"step": 4662
},
{
"epoch": 0.3,
"grad_norm": 0.9093601107597351,
"learning_rate": 8.26378796706581e-06,
"loss": 0.6109,
"step": 4663
},
{
"epoch": 0.3,
"grad_norm": 0.8617517352104187,
"learning_rate": 8.263010639561666e-06,
"loss": 0.6166,
"step": 4664
},
{
"epoch": 0.3,
"grad_norm": 0.8573476076126099,
"learning_rate": 8.262233174663497e-06,
"loss": 0.5985,
"step": 4665
},
{
"epoch": 0.3,
"grad_norm": 0.9106038808822632,
"learning_rate": 8.261455572404036e-06,
"loss": 0.6184,
"step": 4666
},
{
"epoch": 0.3,
"grad_norm": 0.9015377163887024,
"learning_rate": 8.260677832816029e-06,
"loss": 0.5952,
"step": 4667
},
{
"epoch": 0.3,
"grad_norm": 0.8836144804954529,
"learning_rate": 8.259899955932218e-06,
"loss": 0.5964,
"step": 4668
},
{
"epoch": 0.3,
"grad_norm": 0.9044028520584106,
"learning_rate": 8.259121941785362e-06,
"loss": 0.626,
"step": 4669
},
{
"epoch": 0.3,
"grad_norm": 0.8526366353034973,
"learning_rate": 8.25834379040822e-06,
"loss": 0.5792,
"step": 4670
},
{
"epoch": 0.3,
"grad_norm": 0.8615383505821228,
"learning_rate": 8.257565501833555e-06,
"loss": 0.6326,
"step": 4671
},
{
"epoch": 0.3,
"grad_norm": 0.8761278986930847,
"learning_rate": 8.256787076094138e-06,
"loss": 0.5917,
"step": 4672
},
{
"epoch": 0.3,
"grad_norm": 0.9185283780097961,
"learning_rate": 8.256008513222747e-06,
"loss": 0.5846,
"step": 4673
},
{
"epoch": 0.3,
"grad_norm": 0.9208911061286926,
"learning_rate": 8.255229813252167e-06,
"loss": 0.6307,
"step": 4674
},
{
"epoch": 0.3,
"grad_norm": 0.8966217637062073,
"learning_rate": 8.25445097621518e-06,
"loss": 0.6079,
"step": 4675
},
{
"epoch": 0.3,
"grad_norm": 0.9059707522392273,
"learning_rate": 8.253672002144584e-06,
"loss": 0.5925,
"step": 4676
},
{
"epoch": 0.3,
"grad_norm": 0.8802112340927124,
"learning_rate": 8.25289289107318e-06,
"loss": 0.632,
"step": 4677
},
{
"epoch": 0.3,
"grad_norm": 0.8126215934753418,
"learning_rate": 8.252113643033774e-06,
"loss": 0.6072,
"step": 4678
},
{
"epoch": 0.3,
"grad_norm": 0.9150187969207764,
"learning_rate": 8.251334258059173e-06,
"loss": 0.6969,
"step": 4679
},
{
"epoch": 0.3,
"grad_norm": 0.8734495043754578,
"learning_rate": 8.250554736182199e-06,
"loss": 0.6336,
"step": 4680
},
{
"epoch": 0.3,
"grad_norm": 0.8438607454299927,
"learning_rate": 8.249775077435671e-06,
"loss": 0.6163,
"step": 4681
},
{
"epoch": 0.3,
"grad_norm": 0.9356812238693237,
"learning_rate": 8.24899528185242e-06,
"loss": 0.5518,
"step": 4682
},
{
"epoch": 0.3,
"grad_norm": 0.9281412959098816,
"learning_rate": 8.24821534946528e-06,
"loss": 0.6216,
"step": 4683
},
{
"epoch": 0.3,
"grad_norm": 0.897492527961731,
"learning_rate": 8.247435280307093e-06,
"loss": 0.6225,
"step": 4684
},
{
"epoch": 0.3,
"grad_norm": 0.8860996961593628,
"learning_rate": 8.246655074410703e-06,
"loss": 0.6648,
"step": 4685
},
{
"epoch": 0.3,
"grad_norm": 0.9270169138908386,
"learning_rate": 8.24587473180896e-06,
"loss": 0.6332,
"step": 4686
},
{
"epoch": 0.3,
"grad_norm": 0.9059301018714905,
"learning_rate": 8.245094252534727e-06,
"loss": 0.605,
"step": 4687
},
{
"epoch": 0.3,
"grad_norm": 0.9001350402832031,
"learning_rate": 8.244313636620862e-06,
"loss": 0.6293,
"step": 4688
},
{
"epoch": 0.3,
"grad_norm": 0.9218695759773254,
"learning_rate": 8.243532884100236e-06,
"loss": 0.6064,
"step": 4689
},
{
"epoch": 0.3,
"grad_norm": 0.8904708027839661,
"learning_rate": 8.242751995005721e-06,
"loss": 0.6136,
"step": 4690
},
{
"epoch": 0.3,
"grad_norm": 0.8279531002044678,
"learning_rate": 8.241970969370205e-06,
"loss": 0.5799,
"step": 4691
},
{
"epoch": 0.3,
"grad_norm": 0.887289822101593,
"learning_rate": 8.241189807226566e-06,
"loss": 0.6134,
"step": 4692
},
{
"epoch": 0.3,
"grad_norm": 0.9113506078720093,
"learning_rate": 8.240408508607703e-06,
"loss": 0.6129,
"step": 4693
},
{
"epoch": 0.3,
"grad_norm": 0.9350869655609131,
"learning_rate": 8.239627073546507e-06,
"loss": 0.6008,
"step": 4694
},
{
"epoch": 0.3,
"grad_norm": 0.8350124955177307,
"learning_rate": 8.238845502075886e-06,
"loss": 0.6183,
"step": 4695
},
{
"epoch": 0.3,
"grad_norm": 0.8774599432945251,
"learning_rate": 8.238063794228748e-06,
"loss": 0.5686,
"step": 4696
},
{
"epoch": 0.3,
"grad_norm": 0.8577974438667297,
"learning_rate": 8.237281950038008e-06,
"loss": 0.686,
"step": 4697
},
{
"epoch": 0.3,
"grad_norm": 0.8853060603141785,
"learning_rate": 8.236499969536585e-06,
"loss": 0.6389,
"step": 4698
},
{
"epoch": 0.3,
"grad_norm": 0.8757472038269043,
"learning_rate": 8.23571785275741e-06,
"loss": 0.6098,
"step": 4699
},
{
"epoch": 0.3,
"grad_norm": 0.9388381242752075,
"learning_rate": 8.234935599733412e-06,
"loss": 0.6703,
"step": 4700
},
{
"epoch": 0.3,
"grad_norm": 0.9161108136177063,
"learning_rate": 8.234153210497528e-06,
"loss": 0.5443,
"step": 4701
},
{
"epoch": 0.3,
"grad_norm": 0.8968355059623718,
"learning_rate": 8.233370685082704e-06,
"loss": 0.6287,
"step": 4702
},
{
"epoch": 0.3,
"grad_norm": 0.8884924650192261,
"learning_rate": 8.232588023521888e-06,
"loss": 0.5913,
"step": 4703
},
{
"epoch": 0.3,
"grad_norm": 0.8946593999862671,
"learning_rate": 8.231805225848035e-06,
"loss": 0.6407,
"step": 4704
},
{
"epoch": 0.3,
"grad_norm": 0.8582884669303894,
"learning_rate": 8.23102229209411e-06,
"loss": 0.5737,
"step": 4705
},
{
"epoch": 0.3,
"grad_norm": 0.9519075155258179,
"learning_rate": 8.230239222293073e-06,
"loss": 0.5501,
"step": 4706
},
{
"epoch": 0.3,
"grad_norm": 0.9213956594467163,
"learning_rate": 8.229456016477899e-06,
"loss": 0.5993,
"step": 4707
},
{
"epoch": 0.3,
"grad_norm": 0.9476253390312195,
"learning_rate": 8.228672674681568e-06,
"loss": 0.6097,
"step": 4708
},
{
"epoch": 0.3,
"grad_norm": 0.9826415181159973,
"learning_rate": 8.227889196937062e-06,
"loss": 0.697,
"step": 4709
},
{
"epoch": 0.3,
"grad_norm": 0.9420339465141296,
"learning_rate": 8.227105583277372e-06,
"loss": 0.5592,
"step": 4710
},
{
"epoch": 0.3,
"grad_norm": 0.8203204870223999,
"learning_rate": 8.22632183373549e-06,
"loss": 0.6001,
"step": 4711
},
{
"epoch": 0.3,
"grad_norm": 0.8779041171073914,
"learning_rate": 8.225537948344423e-06,
"loss": 0.6033,
"step": 4712
},
{
"epoch": 0.3,
"grad_norm": 0.8583627343177795,
"learning_rate": 8.224753927137171e-06,
"loss": 0.642,
"step": 4713
},
{
"epoch": 0.3,
"grad_norm": 0.9116830825805664,
"learning_rate": 8.22396977014675e-06,
"loss": 0.6005,
"step": 4714
},
{
"epoch": 0.3,
"grad_norm": 0.9073758125305176,
"learning_rate": 8.223185477406175e-06,
"loss": 0.6414,
"step": 4715
},
{
"epoch": 0.3,
"grad_norm": 0.9207981824874878,
"learning_rate": 8.222401048948476e-06,
"loss": 0.6138,
"step": 4716
},
{
"epoch": 0.3,
"grad_norm": 0.8661931753158569,
"learning_rate": 8.221616484806676e-06,
"loss": 0.6059,
"step": 4717
},
{
"epoch": 0.3,
"grad_norm": 0.8917931318283081,
"learning_rate": 8.220831785013814e-06,
"loss": 0.613,
"step": 4718
},
{
"epoch": 0.3,
"grad_norm": 0.8399578928947449,
"learning_rate": 8.22004694960293e-06,
"loss": 0.5698,
"step": 4719
},
{
"epoch": 0.3,
"grad_norm": 0.8338463306427002,
"learning_rate": 8.21926197860707e-06,
"loss": 0.5719,
"step": 4720
},
{
"epoch": 0.3,
"grad_norm": 0.9184006452560425,
"learning_rate": 8.218476872059288e-06,
"loss": 0.5921,
"step": 4721
},
{
"epoch": 0.3,
"grad_norm": 0.9487320780754089,
"learning_rate": 8.217691629992641e-06,
"loss": 0.6159,
"step": 4722
},
{
"epoch": 0.3,
"grad_norm": 0.8477067351341248,
"learning_rate": 8.216906252440193e-06,
"loss": 0.5805,
"step": 4723
},
{
"epoch": 0.3,
"grad_norm": 0.878990650177002,
"learning_rate": 8.216120739435013e-06,
"loss": 0.6369,
"step": 4724
},
{
"epoch": 0.3,
"grad_norm": 0.8777364492416382,
"learning_rate": 8.215335091010177e-06,
"loss": 0.5752,
"step": 4725
},
{
"epoch": 0.3,
"grad_norm": 0.8827346563339233,
"learning_rate": 8.214549307198765e-06,
"loss": 0.5918,
"step": 4726
},
{
"epoch": 0.3,
"grad_norm": 0.9704633355140686,
"learning_rate": 8.213763388033867e-06,
"loss": 0.6544,
"step": 4727
},
{
"epoch": 0.3,
"grad_norm": 0.946010172367096,
"learning_rate": 8.212977333548569e-06,
"loss": 0.6527,
"step": 4728
},
{
"epoch": 0.3,
"grad_norm": 0.9947195053100586,
"learning_rate": 8.212191143775973e-06,
"loss": 0.6222,
"step": 4729
},
{
"epoch": 0.3,
"grad_norm": 0.9032514691352844,
"learning_rate": 8.211404818749184e-06,
"loss": 0.6712,
"step": 4730
},
{
"epoch": 0.3,
"grad_norm": 0.8361782431602478,
"learning_rate": 8.21061835850131e-06,
"loss": 0.5807,
"step": 4731
},
{
"epoch": 0.3,
"grad_norm": 0.8890867233276367,
"learning_rate": 8.209831763065465e-06,
"loss": 0.6234,
"step": 4732
},
{
"epoch": 0.3,
"grad_norm": 0.9419566988945007,
"learning_rate": 8.209045032474773e-06,
"loss": 0.5666,
"step": 4733
},
{
"epoch": 0.3,
"grad_norm": 0.8726335763931274,
"learning_rate": 8.208258166762355e-06,
"loss": 0.5848,
"step": 4734
},
{
"epoch": 0.3,
"grad_norm": 0.8878278136253357,
"learning_rate": 8.207471165961347e-06,
"loss": 0.6069,
"step": 4735
},
{
"epoch": 0.3,
"grad_norm": 0.8903132081031799,
"learning_rate": 8.206684030104886e-06,
"loss": 0.6338,
"step": 4736
},
{
"epoch": 0.3,
"grad_norm": 0.8635721206665039,
"learning_rate": 8.205896759226115e-06,
"loss": 0.6088,
"step": 4737
},
{
"epoch": 0.3,
"grad_norm": 0.8765946626663208,
"learning_rate": 8.205109353358186e-06,
"loss": 0.637,
"step": 4738
},
{
"epoch": 0.3,
"grad_norm": 0.8656042218208313,
"learning_rate": 8.20432181253425e-06,
"loss": 0.5851,
"step": 4739
},
{
"epoch": 0.3,
"grad_norm": 0.8711687922477722,
"learning_rate": 8.203534136787473e-06,
"loss": 0.5607,
"step": 4740
},
{
"epoch": 0.3,
"grad_norm": 0.8796273469924927,
"learning_rate": 8.202746326151015e-06,
"loss": 0.5778,
"step": 4741
},
{
"epoch": 0.3,
"grad_norm": 0.8987690806388855,
"learning_rate": 8.20195838065805e-06,
"loss": 0.6896,
"step": 4742
},
{
"epoch": 0.3,
"grad_norm": 0.9169846177101135,
"learning_rate": 8.201170300341757e-06,
"loss": 0.6126,
"step": 4743
},
{
"epoch": 0.3,
"grad_norm": 0.8620352149009705,
"learning_rate": 8.20038208523532e-06,
"loss": 0.6509,
"step": 4744
},
{
"epoch": 0.3,
"grad_norm": 0.9679823517799377,
"learning_rate": 8.199593735371924e-06,
"loss": 0.6318,
"step": 4745
},
{
"epoch": 0.3,
"grad_norm": 0.8961201310157776,
"learning_rate": 8.198805250784769e-06,
"loss": 0.6057,
"step": 4746
},
{
"epoch": 0.3,
"grad_norm": 0.8943774700164795,
"learning_rate": 8.198016631507053e-06,
"loss": 0.5722,
"step": 4747
},
{
"epoch": 0.3,
"grad_norm": 0.9439212679862976,
"learning_rate": 8.19722787757198e-06,
"loss": 0.5788,
"step": 4748
},
{
"epoch": 0.3,
"grad_norm": 0.929137647151947,
"learning_rate": 8.196438989012765e-06,
"loss": 0.5959,
"step": 4749
},
{
"epoch": 0.3,
"grad_norm": 0.8944662809371948,
"learning_rate": 8.195649965862622e-06,
"loss": 0.5926,
"step": 4750
},
{
"epoch": 0.3,
"grad_norm": 0.910987138748169,
"learning_rate": 8.194860808154778e-06,
"loss": 0.5805,
"step": 4751
},
{
"epoch": 0.3,
"grad_norm": 0.8900378346443176,
"learning_rate": 8.194071515922456e-06,
"loss": 0.5787,
"step": 4752
},
{
"epoch": 0.3,
"grad_norm": 0.9534246325492859,
"learning_rate": 8.193282089198897e-06,
"loss": 0.6274,
"step": 4753
},
{
"epoch": 0.3,
"grad_norm": 0.9117621779441833,
"learning_rate": 8.192492528017337e-06,
"loss": 0.6155,
"step": 4754
},
{
"epoch": 0.3,
"grad_norm": 0.830488383769989,
"learning_rate": 8.191702832411023e-06,
"loss": 0.5826,
"step": 4755
},
{
"epoch": 0.3,
"grad_norm": 0.9440089464187622,
"learning_rate": 8.190913002413204e-06,
"loss": 0.652,
"step": 4756
},
{
"epoch": 0.3,
"grad_norm": 0.9013427495956421,
"learning_rate": 8.19012303805714e-06,
"loss": 0.6266,
"step": 4757
},
{
"epoch": 0.3,
"grad_norm": 0.8950908184051514,
"learning_rate": 8.189332939376092e-06,
"loss": 0.6494,
"step": 4758
},
{
"epoch": 0.3,
"grad_norm": 0.8194960951805115,
"learning_rate": 8.188542706403331e-06,
"loss": 0.5597,
"step": 4759
},
{
"epoch": 0.3,
"grad_norm": 0.8966452479362488,
"learning_rate": 8.187752339172126e-06,
"loss": 0.5973,
"step": 4760
},
{
"epoch": 0.3,
"grad_norm": 0.8810895681381226,
"learning_rate": 8.18696183771576e-06,
"loss": 0.6343,
"step": 4761
},
{
"epoch": 0.3,
"grad_norm": 0.9110032916069031,
"learning_rate": 8.18617120206752e-06,
"loss": 0.6262,
"step": 4762
},
{
"epoch": 0.3,
"grad_norm": 0.8125797510147095,
"learning_rate": 8.185380432260693e-06,
"loss": 0.5832,
"step": 4763
},
{
"epoch": 0.3,
"grad_norm": 0.9191034436225891,
"learning_rate": 8.184589528328576e-06,
"loss": 0.6396,
"step": 4764
},
{
"epoch": 0.3,
"grad_norm": 0.910497784614563,
"learning_rate": 8.183798490304473e-06,
"loss": 0.6085,
"step": 4765
},
{
"epoch": 0.3,
"grad_norm": 0.9907393455505371,
"learning_rate": 8.183007318221691e-06,
"loss": 0.6692,
"step": 4766
},
{
"epoch": 0.3,
"grad_norm": 0.897619903087616,
"learning_rate": 8.182216012113543e-06,
"loss": 0.5841,
"step": 4767
},
{
"epoch": 0.3,
"grad_norm": 0.8304966688156128,
"learning_rate": 8.181424572013352e-06,
"loss": 0.5711,
"step": 4768
},
{
"epoch": 0.3,
"grad_norm": 0.8721338510513306,
"learning_rate": 8.180632997954437e-06,
"loss": 0.5515,
"step": 4769
},
{
"epoch": 0.3,
"grad_norm": 0.9142031073570251,
"learning_rate": 8.179841289970132e-06,
"loss": 0.6018,
"step": 4770
},
{
"epoch": 0.3,
"grad_norm": 0.8813204765319824,
"learning_rate": 8.179049448093771e-06,
"loss": 0.6077,
"step": 4771
},
{
"epoch": 0.3,
"grad_norm": 0.8773213624954224,
"learning_rate": 8.178257472358697e-06,
"loss": 0.6325,
"step": 4772
},
{
"epoch": 0.3,
"grad_norm": 0.936450183391571,
"learning_rate": 8.177465362798259e-06,
"loss": 0.6321,
"step": 4773
},
{
"epoch": 0.3,
"grad_norm": 0.898252010345459,
"learning_rate": 8.176673119445807e-06,
"loss": 0.6171,
"step": 4774
},
{
"epoch": 0.3,
"grad_norm": 0.8829185366630554,
"learning_rate": 8.1758807423347e-06,
"loss": 0.6601,
"step": 4775
},
{
"epoch": 0.3,
"grad_norm": 0.8611942529678345,
"learning_rate": 8.175088231498304e-06,
"loss": 0.6078,
"step": 4776
},
{
"epoch": 0.3,
"grad_norm": 0.9188866019248962,
"learning_rate": 8.174295586969987e-06,
"loss": 0.5632,
"step": 4777
},
{
"epoch": 0.3,
"grad_norm": 0.9207391142845154,
"learning_rate": 8.173502808783127e-06,
"loss": 0.6337,
"step": 4778
},
{
"epoch": 0.3,
"grad_norm": 0.8784085512161255,
"learning_rate": 8.172709896971103e-06,
"loss": 0.5728,
"step": 4779
},
{
"epoch": 0.3,
"grad_norm": 0.9227593541145325,
"learning_rate": 8.1719168515673e-06,
"loss": 0.6868,
"step": 4780
},
{
"epoch": 0.3,
"grad_norm": 0.9259268045425415,
"learning_rate": 8.171123672605116e-06,
"loss": 0.6095,
"step": 4781
},
{
"epoch": 0.3,
"grad_norm": 0.8867712020874023,
"learning_rate": 8.170330360117944e-06,
"loss": 0.6493,
"step": 4782
},
{
"epoch": 0.3,
"grad_norm": 0.9128400087356567,
"learning_rate": 8.169536914139189e-06,
"loss": 0.5594,
"step": 4783
},
{
"epoch": 0.3,
"grad_norm": 0.9003540873527527,
"learning_rate": 8.168743334702262e-06,
"loss": 0.5926,
"step": 4784
},
{
"epoch": 0.3,
"grad_norm": 0.9833676218986511,
"learning_rate": 8.167949621840576e-06,
"loss": 0.621,
"step": 4785
},
{
"epoch": 0.3,
"grad_norm": 0.865376889705658,
"learning_rate": 8.16715577558755e-06,
"loss": 0.5874,
"step": 4786
},
{
"epoch": 0.3,
"grad_norm": 0.8381644487380981,
"learning_rate": 8.166361795976614e-06,
"loss": 0.6214,
"step": 4787
},
{
"epoch": 0.3,
"grad_norm": 0.9137545228004456,
"learning_rate": 8.165567683041197e-06,
"loss": 0.6128,
"step": 4788
},
{
"epoch": 0.3,
"grad_norm": 0.8159583210945129,
"learning_rate": 8.164773436814736e-06,
"loss": 0.5778,
"step": 4789
},
{
"epoch": 0.3,
"grad_norm": 0.9791309833526611,
"learning_rate": 8.163979057330677e-06,
"loss": 0.598,
"step": 4790
},
{
"epoch": 0.3,
"grad_norm": 0.8282786011695862,
"learning_rate": 8.163184544622467e-06,
"loss": 0.4893,
"step": 4791
},
{
"epoch": 0.3,
"grad_norm": 0.9099088907241821,
"learning_rate": 8.162389898723558e-06,
"loss": 0.6035,
"step": 4792
},
{
"epoch": 0.3,
"grad_norm": 0.8515049815177917,
"learning_rate": 8.161595119667413e-06,
"loss": 0.582,
"step": 4793
},
{
"epoch": 0.3,
"grad_norm": 0.9349850416183472,
"learning_rate": 8.160800207487495e-06,
"loss": 0.6135,
"step": 4794
},
{
"epoch": 0.3,
"grad_norm": 0.848458468914032,
"learning_rate": 8.160005162217275e-06,
"loss": 0.6004,
"step": 4795
},
{
"epoch": 0.3,
"grad_norm": 0.8345276117324829,
"learning_rate": 8.159209983890232e-06,
"loss": 0.5956,
"step": 4796
},
{
"epoch": 0.3,
"grad_norm": 0.8990775942802429,
"learning_rate": 8.158414672539845e-06,
"loss": 0.6247,
"step": 4797
},
{
"epoch": 0.3,
"grad_norm": 0.8795309662818909,
"learning_rate": 8.157619228199605e-06,
"loss": 0.5965,
"step": 4798
},
{
"epoch": 0.3,
"grad_norm": 0.9947782158851624,
"learning_rate": 8.156823650903003e-06,
"loss": 0.6742,
"step": 4799
},
{
"epoch": 0.3,
"grad_norm": 0.8884429931640625,
"learning_rate": 8.156027940683539e-06,
"loss": 0.5757,
"step": 4800
},
{
"epoch": 0.3,
"grad_norm": 0.888424277305603,
"learning_rate": 8.15523209757472e-06,
"loss": 0.6723,
"step": 4801
},
{
"epoch": 0.3,
"grad_norm": 0.9217067360877991,
"learning_rate": 8.15443612161005e-06,
"loss": 0.6168,
"step": 4802
},
{
"epoch": 0.3,
"grad_norm": 0.8976277709007263,
"learning_rate": 8.15364001282305e-06,
"loss": 0.6127,
"step": 4803
},
{
"epoch": 0.3,
"grad_norm": 0.8980615735054016,
"learning_rate": 8.15284377124724e-06,
"loss": 0.5577,
"step": 4804
},
{
"epoch": 0.3,
"grad_norm": 0.8790192008018494,
"learning_rate": 8.152047396916145e-06,
"loss": 0.5193,
"step": 4805
},
{
"epoch": 0.3,
"grad_norm": 0.9707584977149963,
"learning_rate": 8.1512508898633e-06,
"loss": 0.6459,
"step": 4806
},
{
"epoch": 0.3,
"grad_norm": 0.8137477040290833,
"learning_rate": 8.150454250122245e-06,
"loss": 0.585,
"step": 4807
},
{
"epoch": 0.3,
"grad_norm": 0.8875191807746887,
"learning_rate": 8.149657477726518e-06,
"loss": 0.581,
"step": 4808
},
{
"epoch": 0.3,
"grad_norm": 0.8823238611221313,
"learning_rate": 8.148860572709674e-06,
"loss": 0.6095,
"step": 4809
},
{
"epoch": 0.3,
"grad_norm": 0.8552770018577576,
"learning_rate": 8.148063535105261e-06,
"loss": 0.596,
"step": 4810
},
{
"epoch": 0.3,
"grad_norm": 0.9799924492835999,
"learning_rate": 8.147266364946848e-06,
"loss": 0.6214,
"step": 4811
},
{
"epoch": 0.3,
"grad_norm": 0.9276837706565857,
"learning_rate": 8.146469062267995e-06,
"loss": 0.6281,
"step": 4812
},
{
"epoch": 0.3,
"grad_norm": 0.9394620656967163,
"learning_rate": 8.145671627102277e-06,
"loss": 0.606,
"step": 4813
},
{
"epoch": 0.3,
"grad_norm": 0.8922251462936401,
"learning_rate": 8.14487405948327e-06,
"loss": 0.5859,
"step": 4814
},
{
"epoch": 0.31,
"grad_norm": 0.8474637866020203,
"learning_rate": 8.144076359444555e-06,
"loss": 0.5582,
"step": 4815
},
{
"epoch": 0.31,
"grad_norm": 0.9072783589363098,
"learning_rate": 8.143278527019722e-06,
"loss": 0.6062,
"step": 4816
},
{
"epoch": 0.31,
"grad_norm": 0.9250416159629822,
"learning_rate": 8.142480562242365e-06,
"loss": 0.6103,
"step": 4817
},
{
"epoch": 0.31,
"grad_norm": 0.9013091325759888,
"learning_rate": 8.141682465146084e-06,
"loss": 0.6005,
"step": 4818
},
{
"epoch": 0.31,
"grad_norm": 0.8904187083244324,
"learning_rate": 8.140884235764484e-06,
"loss": 0.6026,
"step": 4819
},
{
"epoch": 0.31,
"grad_norm": 0.884691596031189,
"learning_rate": 8.140085874131174e-06,
"loss": 0.5996,
"step": 4820
},
{
"epoch": 0.31,
"grad_norm": 0.8515493869781494,
"learning_rate": 8.139287380279773e-06,
"loss": 0.6056,
"step": 4821
},
{
"epoch": 0.31,
"grad_norm": 0.9234597086906433,
"learning_rate": 8.138488754243899e-06,
"loss": 0.6553,
"step": 4822
},
{
"epoch": 0.31,
"grad_norm": 0.9508641958236694,
"learning_rate": 8.137689996057183e-06,
"loss": 0.6402,
"step": 4823
},
{
"epoch": 0.31,
"grad_norm": 0.8529036641120911,
"learning_rate": 8.136891105753258e-06,
"loss": 0.5396,
"step": 4824
},
{
"epoch": 0.31,
"grad_norm": 0.888896107673645,
"learning_rate": 8.136092083365758e-06,
"loss": 0.6194,
"step": 4825
},
{
"epoch": 0.31,
"grad_norm": 0.8955714106559753,
"learning_rate": 8.13529292892833e-06,
"loss": 0.6351,
"step": 4826
},
{
"epoch": 0.31,
"grad_norm": 0.89292311668396,
"learning_rate": 8.134493642474625e-06,
"loss": 0.6317,
"step": 4827
},
{
"epoch": 0.31,
"grad_norm": 0.8899608850479126,
"learning_rate": 8.133694224038297e-06,
"loss": 0.5935,
"step": 4828
},
{
"epoch": 0.31,
"grad_norm": 0.8758254051208496,
"learning_rate": 8.132894673653007e-06,
"loss": 0.6203,
"step": 4829
},
{
"epoch": 0.31,
"grad_norm": 0.8891094326972961,
"learning_rate": 8.13209499135242e-06,
"loss": 0.5985,
"step": 4830
},
{
"epoch": 0.31,
"grad_norm": 0.9486083984375,
"learning_rate": 8.131295177170208e-06,
"loss": 0.6333,
"step": 4831
},
{
"epoch": 0.31,
"grad_norm": 0.9144448041915894,
"learning_rate": 8.13049523114005e-06,
"loss": 0.6241,
"step": 4832
},
{
"epoch": 0.31,
"grad_norm": 0.8380624651908875,
"learning_rate": 8.129695153295627e-06,
"loss": 0.5578,
"step": 4833
},
{
"epoch": 0.31,
"grad_norm": 0.8630735278129578,
"learning_rate": 8.128894943670631e-06,
"loss": 0.5934,
"step": 4834
},
{
"epoch": 0.31,
"grad_norm": 0.8654153943061829,
"learning_rate": 8.12809460229875e-06,
"loss": 0.5767,
"step": 4835
},
{
"epoch": 0.31,
"grad_norm": 0.8959805369377136,
"learning_rate": 8.127294129213691e-06,
"loss": 0.6212,
"step": 4836
},
{
"epoch": 0.31,
"grad_norm": 0.8940380215644836,
"learning_rate": 8.126493524449153e-06,
"loss": 0.6261,
"step": 4837
},
{
"epoch": 0.31,
"grad_norm": 0.946277916431427,
"learning_rate": 8.12569278803885e-06,
"loss": 0.617,
"step": 4838
},
{
"epoch": 0.31,
"grad_norm": 0.9186848998069763,
"learning_rate": 8.124891920016495e-06,
"loss": 0.6387,
"step": 4839
},
{
"epoch": 0.31,
"grad_norm": 0.9961265325546265,
"learning_rate": 8.124090920415814e-06,
"loss": 0.6452,
"step": 4840
},
{
"epoch": 0.31,
"grad_norm": 0.8689594268798828,
"learning_rate": 8.123289789270532e-06,
"loss": 0.6492,
"step": 4841
},
{
"epoch": 0.31,
"grad_norm": 0.9431710243225098,
"learning_rate": 8.12248852661438e-06,
"loss": 0.639,
"step": 4842
},
{
"epoch": 0.31,
"grad_norm": 0.8190953731536865,
"learning_rate": 8.121687132481101e-06,
"loss": 0.5752,
"step": 4843
},
{
"epoch": 0.31,
"grad_norm": 0.828509509563446,
"learning_rate": 8.120885606904436e-06,
"loss": 0.5913,
"step": 4844
},
{
"epoch": 0.31,
"grad_norm": 0.8336859345436096,
"learning_rate": 8.120083949918137e-06,
"loss": 0.5577,
"step": 4845
},
{
"epoch": 0.31,
"grad_norm": 0.8060721158981323,
"learning_rate": 8.119282161555952e-06,
"loss": 0.5642,
"step": 4846
},
{
"epoch": 0.31,
"grad_norm": 0.9661688208580017,
"learning_rate": 8.11848024185165e-06,
"loss": 0.6525,
"step": 4847
},
{
"epoch": 0.31,
"grad_norm": 0.8893968462944031,
"learning_rate": 8.117678190838991e-06,
"loss": 0.6042,
"step": 4848
},
{
"epoch": 0.31,
"grad_norm": 0.8722717761993408,
"learning_rate": 8.116876008551751e-06,
"loss": 0.5784,
"step": 4849
},
{
"epoch": 0.31,
"grad_norm": 0.8579627275466919,
"learning_rate": 8.116073695023704e-06,
"loss": 0.6056,
"step": 4850
},
{
"epoch": 0.31,
"grad_norm": 0.9025737643241882,
"learning_rate": 8.115271250288635e-06,
"loss": 0.5785,
"step": 4851
},
{
"epoch": 0.31,
"grad_norm": 0.9787115454673767,
"learning_rate": 8.114468674380328e-06,
"loss": 0.6641,
"step": 4852
},
{
"epoch": 0.31,
"grad_norm": 0.9400716423988342,
"learning_rate": 8.113665967332582e-06,
"loss": 0.604,
"step": 4853
},
{
"epoch": 0.31,
"grad_norm": 0.8718861937522888,
"learning_rate": 8.112863129179194e-06,
"loss": 0.644,
"step": 4854
},
{
"epoch": 0.31,
"grad_norm": 0.9167654514312744,
"learning_rate": 8.112060159953966e-06,
"loss": 0.6431,
"step": 4855
},
{
"epoch": 0.31,
"grad_norm": 0.9498146176338196,
"learning_rate": 8.111257059690714e-06,
"loss": 0.6549,
"step": 4856
},
{
"epoch": 0.31,
"grad_norm": 0.9236502051353455,
"learning_rate": 8.110453828423248e-06,
"loss": 0.6302,
"step": 4857
},
{
"epoch": 0.31,
"grad_norm": 0.9004657864570618,
"learning_rate": 8.109650466185394e-06,
"loss": 0.6053,
"step": 4858
},
{
"epoch": 0.31,
"grad_norm": 0.8638118505477905,
"learning_rate": 8.108846973010975e-06,
"loss": 0.558,
"step": 4859
},
{
"epoch": 0.31,
"grad_norm": 0.8778232932090759,
"learning_rate": 8.108043348933825e-06,
"loss": 0.626,
"step": 4860
},
{
"epoch": 0.31,
"grad_norm": 0.9175794720649719,
"learning_rate": 8.107239593987781e-06,
"loss": 0.5887,
"step": 4861
},
{
"epoch": 0.31,
"grad_norm": 0.9042779207229614,
"learning_rate": 8.10643570820669e-06,
"loss": 0.6235,
"step": 4862
},
{
"epoch": 0.31,
"grad_norm": 0.9080462455749512,
"learning_rate": 8.105631691624394e-06,
"loss": 0.6401,
"step": 4863
},
{
"epoch": 0.31,
"grad_norm": 0.9129647016525269,
"learning_rate": 8.104827544274754e-06,
"loss": 0.7069,
"step": 4864
},
{
"epoch": 0.31,
"grad_norm": 0.876524806022644,
"learning_rate": 8.104023266191625e-06,
"loss": 0.5799,
"step": 4865
},
{
"epoch": 0.31,
"grad_norm": 0.9011818766593933,
"learning_rate": 8.103218857408875e-06,
"loss": 0.6213,
"step": 4866
},
{
"epoch": 0.31,
"grad_norm": 0.9621694684028625,
"learning_rate": 8.102414317960373e-06,
"loss": 0.6267,
"step": 4867
},
{
"epoch": 0.31,
"grad_norm": 0.883078932762146,
"learning_rate": 8.10160964788e-06,
"loss": 0.5822,
"step": 4868
},
{
"epoch": 0.31,
"grad_norm": 0.9894744157791138,
"learning_rate": 8.100804847201632e-06,
"loss": 0.6117,
"step": 4869
},
{
"epoch": 0.31,
"grad_norm": 0.9744158983230591,
"learning_rate": 8.09999991595916e-06,
"loss": 0.5913,
"step": 4870
},
{
"epoch": 0.31,
"grad_norm": 0.9933215379714966,
"learning_rate": 8.099194854186475e-06,
"loss": 0.6645,
"step": 4871
},
{
"epoch": 0.31,
"grad_norm": 0.8537378907203674,
"learning_rate": 8.098389661917475e-06,
"loss": 0.5375,
"step": 4872
},
{
"epoch": 0.31,
"grad_norm": 0.9022486209869385,
"learning_rate": 8.097584339186066e-06,
"loss": 0.5917,
"step": 4873
},
{
"epoch": 0.31,
"grad_norm": 0.8206536173820496,
"learning_rate": 8.096778886026155e-06,
"loss": 0.6281,
"step": 4874
},
{
"epoch": 0.31,
"grad_norm": 0.9026719927787781,
"learning_rate": 8.09597330247166e-06,
"loss": 0.5709,
"step": 4875
},
{
"epoch": 0.31,
"grad_norm": 0.8792065978050232,
"learning_rate": 8.095167588556498e-06,
"loss": 0.6622,
"step": 4876
},
{
"epoch": 0.31,
"grad_norm": 0.938779890537262,
"learning_rate": 8.094361744314597e-06,
"loss": 0.6345,
"step": 4877
},
{
"epoch": 0.31,
"grad_norm": 0.84425288438797,
"learning_rate": 8.093555769779887e-06,
"loss": 0.5552,
"step": 4878
},
{
"epoch": 0.31,
"grad_norm": 0.9458581209182739,
"learning_rate": 8.092749664986304e-06,
"loss": 0.639,
"step": 4879
},
{
"epoch": 0.31,
"grad_norm": 0.8601045608520508,
"learning_rate": 8.091943429967792e-06,
"loss": 0.5757,
"step": 4880
},
{
"epoch": 0.31,
"grad_norm": 0.9777496457099915,
"learning_rate": 8.0911370647583e-06,
"loss": 0.6873,
"step": 4881
},
{
"epoch": 0.31,
"grad_norm": 0.830226480960846,
"learning_rate": 8.090330569391778e-06,
"loss": 0.5941,
"step": 4882
},
{
"epoch": 0.31,
"grad_norm": 0.9674537777900696,
"learning_rate": 8.089523943902187e-06,
"loss": 0.6356,
"step": 4883
},
{
"epoch": 0.31,
"grad_norm": 0.9554563164710999,
"learning_rate": 8.088717188323489e-06,
"loss": 0.6701,
"step": 4884
},
{
"epoch": 0.31,
"grad_norm": 0.8986421823501587,
"learning_rate": 8.087910302689656e-06,
"loss": 0.6009,
"step": 4885
},
{
"epoch": 0.31,
"grad_norm": 0.8413382172584534,
"learning_rate": 8.087103287034664e-06,
"loss": 0.6183,
"step": 4886
},
{
"epoch": 0.31,
"grad_norm": 0.8769293427467346,
"learning_rate": 8.086296141392489e-06,
"loss": 0.5598,
"step": 4887
},
{
"epoch": 0.31,
"grad_norm": 0.8853359818458557,
"learning_rate": 8.08548886579712e-06,
"loss": 0.6184,
"step": 4888
},
{
"epoch": 0.31,
"grad_norm": 0.9408413171768188,
"learning_rate": 8.08468146028255e-06,
"loss": 0.5866,
"step": 4889
},
{
"epoch": 0.31,
"grad_norm": 0.8464492559432983,
"learning_rate": 8.083873924882775e-06,
"loss": 0.5744,
"step": 4890
},
{
"epoch": 0.31,
"grad_norm": 0.9093937873840332,
"learning_rate": 8.083066259631796e-06,
"loss": 0.6079,
"step": 4891
},
{
"epoch": 0.31,
"grad_norm": 0.9300260543823242,
"learning_rate": 8.082258464563621e-06,
"loss": 0.6214,
"step": 4892
},
{
"epoch": 0.31,
"grad_norm": 0.8799288272857666,
"learning_rate": 8.081450539712266e-06,
"loss": 0.5775,
"step": 4893
},
{
"epoch": 0.31,
"grad_norm": 0.9392613768577576,
"learning_rate": 8.080642485111747e-06,
"loss": 0.5812,
"step": 4894
},
{
"epoch": 0.31,
"grad_norm": 0.8481096625328064,
"learning_rate": 8.07983430079609e-06,
"loss": 0.5847,
"step": 4895
},
{
"epoch": 0.31,
"grad_norm": 0.8689022660255432,
"learning_rate": 8.079025986799326e-06,
"loss": 0.5681,
"step": 4896
},
{
"epoch": 0.31,
"grad_norm": 0.8942854404449463,
"learning_rate": 8.078217543155488e-06,
"loss": 0.5555,
"step": 4897
},
{
"epoch": 0.31,
"grad_norm": 0.9350181818008423,
"learning_rate": 8.077408969898619e-06,
"loss": 0.6732,
"step": 4898
},
{
"epoch": 0.31,
"grad_norm": 0.8985404372215271,
"learning_rate": 8.076600267062761e-06,
"loss": 0.6312,
"step": 4899
},
{
"epoch": 0.31,
"grad_norm": 0.9145780801773071,
"learning_rate": 8.07579143468197e-06,
"loss": 0.6166,
"step": 4900
},
{
"epoch": 0.31,
"grad_norm": 0.8609732389450073,
"learning_rate": 8.074982472790302e-06,
"loss": 0.5519,
"step": 4901
},
{
"epoch": 0.31,
"grad_norm": 0.9401060938835144,
"learning_rate": 8.074173381421819e-06,
"loss": 0.6135,
"step": 4902
},
{
"epoch": 0.31,
"grad_norm": 0.8980786800384521,
"learning_rate": 8.073364160610589e-06,
"loss": 0.578,
"step": 4903
},
{
"epoch": 0.31,
"grad_norm": 0.8506133556365967,
"learning_rate": 8.072554810390685e-06,
"loss": 0.5842,
"step": 4904
},
{
"epoch": 0.31,
"grad_norm": 0.9556955099105835,
"learning_rate": 8.071745330796187e-06,
"loss": 0.6877,
"step": 4905
},
{
"epoch": 0.31,
"grad_norm": 0.8503575921058655,
"learning_rate": 8.070935721861178e-06,
"loss": 0.5922,
"step": 4906
},
{
"epoch": 0.31,
"grad_norm": 0.8888681530952454,
"learning_rate": 8.07012598361975e-06,
"loss": 0.6101,
"step": 4907
},
{
"epoch": 0.31,
"grad_norm": 0.8827106952667236,
"learning_rate": 8.069316116105996e-06,
"loss": 0.6722,
"step": 4908
},
{
"epoch": 0.31,
"grad_norm": 0.8604966998100281,
"learning_rate": 8.068506119354019e-06,
"loss": 0.566,
"step": 4909
},
{
"epoch": 0.31,
"grad_norm": 0.9307197332382202,
"learning_rate": 8.067695993397923e-06,
"loss": 0.6324,
"step": 4910
},
{
"epoch": 0.31,
"grad_norm": 0.8086503148078918,
"learning_rate": 8.066885738271821e-06,
"loss": 0.5555,
"step": 4911
},
{
"epoch": 0.31,
"grad_norm": 0.8632538914680481,
"learning_rate": 8.06607535400983e-06,
"loss": 0.5949,
"step": 4912
},
{
"epoch": 0.31,
"grad_norm": 0.893225908279419,
"learning_rate": 8.06526484064607e-06,
"loss": 0.5895,
"step": 4913
},
{
"epoch": 0.31,
"grad_norm": 0.9265469908714294,
"learning_rate": 8.064454198214673e-06,
"loss": 0.6288,
"step": 4914
},
{
"epoch": 0.31,
"grad_norm": 0.9373133778572083,
"learning_rate": 8.063643426749769e-06,
"loss": 0.6299,
"step": 4915
},
{
"epoch": 0.31,
"grad_norm": 0.9107393622398376,
"learning_rate": 8.062832526285498e-06,
"loss": 0.634,
"step": 4916
},
{
"epoch": 0.31,
"grad_norm": 0.9622877836227417,
"learning_rate": 8.062021496856004e-06,
"loss": 0.6507,
"step": 4917
},
{
"epoch": 0.31,
"grad_norm": 0.9220041632652283,
"learning_rate": 8.061210338495437e-06,
"loss": 0.6477,
"step": 4918
},
{
"epoch": 0.31,
"grad_norm": 0.8224441409111023,
"learning_rate": 8.060399051237952e-06,
"loss": 0.65,
"step": 4919
},
{
"epoch": 0.31,
"grad_norm": 0.8881222605705261,
"learning_rate": 8.059587635117709e-06,
"loss": 0.5975,
"step": 4920
},
{
"epoch": 0.31,
"grad_norm": 0.8618130683898926,
"learning_rate": 8.058776090168874e-06,
"loss": 0.5906,
"step": 4921
},
{
"epoch": 0.31,
"grad_norm": 0.8412930369377136,
"learning_rate": 8.057964416425618e-06,
"loss": 0.5719,
"step": 4922
},
{
"epoch": 0.31,
"grad_norm": 0.9025030136108398,
"learning_rate": 8.05715261392212e-06,
"loss": 0.5403,
"step": 4923
},
{
"epoch": 0.31,
"grad_norm": 0.8365161418914795,
"learning_rate": 8.05634068269256e-06,
"loss": 0.5598,
"step": 4924
},
{
"epoch": 0.31,
"grad_norm": 0.8508699536323547,
"learning_rate": 8.055528622771124e-06,
"loss": 0.6019,
"step": 4925
},
{
"epoch": 0.31,
"grad_norm": 0.8714786171913147,
"learning_rate": 8.05471643419201e-06,
"loss": 0.5831,
"step": 4926
},
{
"epoch": 0.31,
"grad_norm": 0.9067984819412231,
"learning_rate": 8.053904116989413e-06,
"loss": 0.6098,
"step": 4927
},
{
"epoch": 0.31,
"grad_norm": 0.7827737927436829,
"learning_rate": 8.053091671197537e-06,
"loss": 0.5421,
"step": 4928
},
{
"epoch": 0.31,
"grad_norm": 0.8685954809188843,
"learning_rate": 8.052279096850591e-06,
"loss": 0.6027,
"step": 4929
},
{
"epoch": 0.31,
"grad_norm": 0.8910870552062988,
"learning_rate": 8.051466393982792e-06,
"loss": 0.6068,
"step": 4930
},
{
"epoch": 0.31,
"grad_norm": 0.8768137693405151,
"learning_rate": 8.050653562628356e-06,
"loss": 0.6151,
"step": 4931
},
{
"epoch": 0.31,
"grad_norm": 0.9031566977500916,
"learning_rate": 8.049840602821512e-06,
"loss": 0.6637,
"step": 4932
},
{
"epoch": 0.31,
"grad_norm": 1.0350744724273682,
"learning_rate": 8.04902751459649e-06,
"loss": 0.606,
"step": 4933
},
{
"epoch": 0.31,
"grad_norm": 0.848858118057251,
"learning_rate": 8.048214297987526e-06,
"loss": 0.5559,
"step": 4934
},
{
"epoch": 0.31,
"grad_norm": 0.8456379771232605,
"learning_rate": 8.047400953028863e-06,
"loss": 0.569,
"step": 4935
},
{
"epoch": 0.31,
"grad_norm": 0.8597366213798523,
"learning_rate": 8.046587479754746e-06,
"loss": 0.5696,
"step": 4936
},
{
"epoch": 0.31,
"grad_norm": 0.9166806936264038,
"learning_rate": 8.04577387819943e-06,
"loss": 0.659,
"step": 4937
},
{
"epoch": 0.31,
"grad_norm": 0.8950727581977844,
"learning_rate": 8.044960148397168e-06,
"loss": 0.6182,
"step": 4938
},
{
"epoch": 0.31,
"grad_norm": 0.9122840166091919,
"learning_rate": 8.04414629038223e-06,
"loss": 0.6245,
"step": 4939
},
{
"epoch": 0.31,
"grad_norm": 0.8276764750480652,
"learning_rate": 8.04333230418888e-06,
"loss": 0.5669,
"step": 4940
},
{
"epoch": 0.31,
"grad_norm": 0.9038193821907043,
"learning_rate": 8.042518189851394e-06,
"loss": 0.5997,
"step": 4941
},
{
"epoch": 0.31,
"grad_norm": 0.894939124584198,
"learning_rate": 8.04170394740405e-06,
"loss": 0.6326,
"step": 4942
},
{
"epoch": 0.31,
"grad_norm": 0.8787042498588562,
"learning_rate": 8.040889576881136e-06,
"loss": 0.6027,
"step": 4943
},
{
"epoch": 0.31,
"grad_norm": 0.8947983980178833,
"learning_rate": 8.04007507831694e-06,
"loss": 0.6459,
"step": 4944
},
{
"epoch": 0.31,
"grad_norm": 0.8807556629180908,
"learning_rate": 8.039260451745758e-06,
"loss": 0.5848,
"step": 4945
},
{
"epoch": 0.31,
"grad_norm": 0.8896877765655518,
"learning_rate": 8.03844569720189e-06,
"loss": 0.6028,
"step": 4946
},
{
"epoch": 0.31,
"grad_norm": 0.872142493724823,
"learning_rate": 8.037630814719644e-06,
"loss": 0.5868,
"step": 4947
},
{
"epoch": 0.31,
"grad_norm": 0.808314859867096,
"learning_rate": 8.036815804333334e-06,
"loss": 0.5896,
"step": 4948
},
{
"epoch": 0.31,
"grad_norm": 0.8907493948936462,
"learning_rate": 8.036000666077273e-06,
"loss": 0.606,
"step": 4949
},
{
"epoch": 0.31,
"grad_norm": 0.8417367339134216,
"learning_rate": 8.035185399985784e-06,
"loss": 0.5841,
"step": 4950
},
{
"epoch": 0.31,
"grad_norm": 0.8830011487007141,
"learning_rate": 8.034370006093198e-06,
"loss": 0.6737,
"step": 4951
},
{
"epoch": 0.31,
"grad_norm": 1.0002917051315308,
"learning_rate": 8.033554484433848e-06,
"loss": 0.587,
"step": 4952
},
{
"epoch": 0.31,
"grad_norm": 0.9197138547897339,
"learning_rate": 8.032738835042068e-06,
"loss": 0.6633,
"step": 4953
},
{
"epoch": 0.31,
"grad_norm": 0.889056384563446,
"learning_rate": 8.031923057952208e-06,
"loss": 0.5941,
"step": 4954
},
{
"epoch": 0.31,
"grad_norm": 0.8778578042984009,
"learning_rate": 8.031107153198617e-06,
"loss": 0.6132,
"step": 4955
},
{
"epoch": 0.31,
"grad_norm": 0.925252377986908,
"learning_rate": 8.030291120815647e-06,
"loss": 0.6115,
"step": 4956
},
{
"epoch": 0.31,
"grad_norm": 0.9050502777099609,
"learning_rate": 8.029474960837657e-06,
"loss": 0.605,
"step": 4957
},
{
"epoch": 0.31,
"grad_norm": 0.8806825280189514,
"learning_rate": 8.028658673299019e-06,
"loss": 0.6294,
"step": 4958
},
{
"epoch": 0.31,
"grad_norm": 0.8301826119422913,
"learning_rate": 8.027842258234097e-06,
"loss": 0.5667,
"step": 4959
},
{
"epoch": 0.31,
"grad_norm": 0.9355791211128235,
"learning_rate": 8.027025715677273e-06,
"loss": 0.6805,
"step": 4960
},
{
"epoch": 0.31,
"grad_norm": 0.9568033814430237,
"learning_rate": 8.026209045662925e-06,
"loss": 0.6433,
"step": 4961
},
{
"epoch": 0.31,
"grad_norm": 0.8783117532730103,
"learning_rate": 8.025392248225444e-06,
"loss": 0.581,
"step": 4962
},
{
"epoch": 0.31,
"grad_norm": 0.8595120906829834,
"learning_rate": 8.024575323399217e-06,
"loss": 0.5877,
"step": 4963
},
{
"epoch": 0.31,
"grad_norm": 0.9189950823783875,
"learning_rate": 8.023758271218646e-06,
"loss": 0.6188,
"step": 4964
},
{
"epoch": 0.31,
"grad_norm": 0.8918676376342773,
"learning_rate": 8.022941091718133e-06,
"loss": 0.6064,
"step": 4965
},
{
"epoch": 0.31,
"grad_norm": 0.853366494178772,
"learning_rate": 8.022123784932085e-06,
"loss": 0.6376,
"step": 4966
},
{
"epoch": 0.31,
"grad_norm": 0.9495976567268372,
"learning_rate": 8.02130635089492e-06,
"loss": 0.6117,
"step": 4967
},
{
"epoch": 0.31,
"grad_norm": 0.9444292187690735,
"learning_rate": 8.020488789641054e-06,
"loss": 0.6688,
"step": 4968
},
{
"epoch": 0.31,
"grad_norm": 0.8615371584892273,
"learning_rate": 8.019671101204914e-06,
"loss": 0.5812,
"step": 4969
},
{
"epoch": 0.31,
"grad_norm": 0.8364808559417725,
"learning_rate": 8.018853285620926e-06,
"loss": 0.5657,
"step": 4970
},
{
"epoch": 0.31,
"grad_norm": 0.9400182962417603,
"learning_rate": 8.018035342923529e-06,
"loss": 0.6043,
"step": 4971
},
{
"epoch": 0.32,
"grad_norm": 0.8639470338821411,
"learning_rate": 8.017217273147165e-06,
"loss": 0.5418,
"step": 4972
},
{
"epoch": 0.32,
"grad_norm": 0.8635435104370117,
"learning_rate": 8.016399076326275e-06,
"loss": 0.6752,
"step": 4973
},
{
"epoch": 0.32,
"grad_norm": 0.974575400352478,
"learning_rate": 8.015580752495314e-06,
"loss": 0.6424,
"step": 4974
},
{
"epoch": 0.32,
"grad_norm": 0.9337494969367981,
"learning_rate": 8.014762301688737e-06,
"loss": 0.6497,
"step": 4975
},
{
"epoch": 0.32,
"grad_norm": 0.9488426446914673,
"learning_rate": 8.013943723941009e-06,
"loss": 0.5005,
"step": 4976
},
{
"epoch": 0.32,
"grad_norm": 0.8811922073364258,
"learning_rate": 8.013125019286594e-06,
"loss": 0.6492,
"step": 4977
},
{
"epoch": 0.32,
"grad_norm": 0.8622782230377197,
"learning_rate": 8.012306187759966e-06,
"loss": 0.6216,
"step": 4978
},
{
"epoch": 0.32,
"grad_norm": 0.9642921686172485,
"learning_rate": 8.011487229395605e-06,
"loss": 0.6454,
"step": 4979
},
{
"epoch": 0.32,
"grad_norm": 0.8489444851875305,
"learning_rate": 8.010668144227991e-06,
"loss": 0.6024,
"step": 4980
},
{
"epoch": 0.32,
"grad_norm": 0.9179771542549133,
"learning_rate": 8.009848932291617e-06,
"loss": 0.5687,
"step": 4981
},
{
"epoch": 0.32,
"grad_norm": 0.8957446813583374,
"learning_rate": 8.009029593620974e-06,
"loss": 0.5745,
"step": 4982
},
{
"epoch": 0.32,
"grad_norm": 0.9170886874198914,
"learning_rate": 8.008210128250563e-06,
"loss": 0.6323,
"step": 4983
},
{
"epoch": 0.32,
"grad_norm": 0.8754706382751465,
"learning_rate": 8.007390536214888e-06,
"loss": 0.6169,
"step": 4984
},
{
"epoch": 0.32,
"grad_norm": 0.8331484794616699,
"learning_rate": 8.006570817548457e-06,
"loss": 0.5864,
"step": 4985
},
{
"epoch": 0.32,
"grad_norm": 0.9085079431533813,
"learning_rate": 8.005750972285793e-06,
"loss": 0.6391,
"step": 4986
},
{
"epoch": 0.32,
"grad_norm": 0.8748310208320618,
"learning_rate": 8.004931000461408e-06,
"loss": 0.5741,
"step": 4987
},
{
"epoch": 0.32,
"grad_norm": 0.8841165900230408,
"learning_rate": 8.004110902109832e-06,
"loss": 0.6129,
"step": 4988
},
{
"epoch": 0.32,
"grad_norm": 0.88663649559021,
"learning_rate": 8.003290677265599e-06,
"loss": 0.6556,
"step": 4989
},
{
"epoch": 0.32,
"grad_norm": 0.8917930126190186,
"learning_rate": 8.002470325963241e-06,
"loss": 0.606,
"step": 4990
},
{
"epoch": 0.32,
"grad_norm": 0.7967976331710815,
"learning_rate": 8.001649848237303e-06,
"loss": 0.5492,
"step": 4991
},
{
"epoch": 0.32,
"grad_norm": 0.8872556090354919,
"learning_rate": 8.000829244122333e-06,
"loss": 0.6114,
"step": 4992
},
{
"epoch": 0.32,
"grad_norm": 1.745118498802185,
"learning_rate": 8.00000851365288e-06,
"loss": 0.6433,
"step": 4993
},
{
"epoch": 0.32,
"grad_norm": 0.8213765621185303,
"learning_rate": 7.999187656863507e-06,
"loss": 0.5746,
"step": 4994
},
{
"epoch": 0.32,
"grad_norm": 0.8834403157234192,
"learning_rate": 7.998366673788775e-06,
"loss": 0.5745,
"step": 4995
},
{
"epoch": 0.32,
"grad_norm": 0.9179670214653015,
"learning_rate": 7.997545564463251e-06,
"loss": 0.641,
"step": 4996
},
{
"epoch": 0.32,
"grad_norm": 0.9359582662582397,
"learning_rate": 7.996724328921514e-06,
"loss": 0.5567,
"step": 4997
},
{
"epoch": 0.32,
"grad_norm": 0.9402004480361938,
"learning_rate": 7.99590296719814e-06,
"loss": 0.638,
"step": 4998
},
{
"epoch": 0.32,
"grad_norm": 0.8939769268035889,
"learning_rate": 7.995081479327712e-06,
"loss": 0.6216,
"step": 4999
},
{
"epoch": 0.32,
"grad_norm": 0.9075430035591125,
"learning_rate": 7.994259865344822e-06,
"loss": 0.6213,
"step": 5000
},
{
"epoch": 0.32,
"grad_norm": 0.9165618419647217,
"learning_rate": 7.993438125284068e-06,
"loss": 0.6449,
"step": 5001
},
{
"epoch": 0.32,
"grad_norm": 0.9765704870223999,
"learning_rate": 7.992616259180045e-06,
"loss": 0.615,
"step": 5002
},
{
"epoch": 0.32,
"grad_norm": 0.9137974977493286,
"learning_rate": 7.991794267067363e-06,
"loss": 0.6025,
"step": 5003
},
{
"epoch": 0.32,
"grad_norm": 0.8844775557518005,
"learning_rate": 7.99097214898063e-06,
"loss": 0.6017,
"step": 5004
},
{
"epoch": 0.32,
"grad_norm": 0.9296790361404419,
"learning_rate": 7.99014990495447e-06,
"loss": 0.5938,
"step": 5005
},
{
"epoch": 0.32,
"grad_norm": 0.8395243287086487,
"learning_rate": 7.989327535023495e-06,
"loss": 0.6087,
"step": 5006
},
{
"epoch": 0.32,
"grad_norm": 0.9407158493995667,
"learning_rate": 7.988505039222339e-06,
"loss": 0.6039,
"step": 5007
},
{
"epoch": 0.32,
"grad_norm": 0.9456518292427063,
"learning_rate": 7.987682417585629e-06,
"loss": 0.6272,
"step": 5008
},
{
"epoch": 0.32,
"grad_norm": 0.9036068916320801,
"learning_rate": 7.98685967014801e-06,
"loss": 0.6433,
"step": 5009
},
{
"epoch": 0.32,
"grad_norm": 0.9327660202980042,
"learning_rate": 7.986036796944116e-06,
"loss": 0.6196,
"step": 5010
},
{
"epoch": 0.32,
"grad_norm": 0.9042969346046448,
"learning_rate": 7.985213798008605e-06,
"loss": 0.6259,
"step": 5011
},
{
"epoch": 0.32,
"grad_norm": 0.8693029880523682,
"learning_rate": 7.984390673376123e-06,
"loss": 0.6165,
"step": 5012
},
{
"epoch": 0.32,
"grad_norm": 0.8812036514282227,
"learning_rate": 7.983567423081331e-06,
"loss": 0.6037,
"step": 5013
},
{
"epoch": 0.32,
"grad_norm": 0.8530508279800415,
"learning_rate": 7.982744047158897e-06,
"loss": 0.6234,
"step": 5014
},
{
"epoch": 0.32,
"grad_norm": 0.9156954884529114,
"learning_rate": 7.981920545643485e-06,
"loss": 0.5921,
"step": 5015
},
{
"epoch": 0.32,
"grad_norm": 0.849946141242981,
"learning_rate": 7.981096918569773e-06,
"loss": 0.5624,
"step": 5016
},
{
"epoch": 0.32,
"grad_norm": 0.9375457763671875,
"learning_rate": 7.980273165972438e-06,
"loss": 0.587,
"step": 5017
},
{
"epoch": 0.32,
"grad_norm": 0.8364583253860474,
"learning_rate": 7.979449287886171e-06,
"loss": 0.5506,
"step": 5018
},
{
"epoch": 0.32,
"grad_norm": 0.8714501857757568,
"learning_rate": 7.978625284345657e-06,
"loss": 0.6281,
"step": 5019
},
{
"epoch": 0.32,
"grad_norm": 0.8554301857948303,
"learning_rate": 7.977801155385595e-06,
"loss": 0.5985,
"step": 5020
},
{
"epoch": 0.32,
"grad_norm": 0.9513722658157349,
"learning_rate": 7.976976901040686e-06,
"loss": 0.6487,
"step": 5021
},
{
"epoch": 0.32,
"grad_norm": 0.9231401085853577,
"learning_rate": 7.976152521345635e-06,
"loss": 0.6764,
"step": 5022
},
{
"epoch": 0.32,
"grad_norm": 0.9738418459892273,
"learning_rate": 7.975328016335154e-06,
"loss": 0.6647,
"step": 5023
},
{
"epoch": 0.32,
"grad_norm": 0.8537105917930603,
"learning_rate": 7.974503386043961e-06,
"loss": 0.6126,
"step": 5024
},
{
"epoch": 0.32,
"grad_norm": 0.9720308780670166,
"learning_rate": 7.973678630506778e-06,
"loss": 0.6771,
"step": 5025
},
{
"epoch": 0.32,
"grad_norm": 0.8640322089195251,
"learning_rate": 7.972853749758334e-06,
"loss": 0.6203,
"step": 5026
},
{
"epoch": 0.32,
"grad_norm": 0.9116325378417969,
"learning_rate": 7.972028743833357e-06,
"loss": 0.6164,
"step": 5027
},
{
"epoch": 0.32,
"grad_norm": 0.8856568336486816,
"learning_rate": 7.971203612766591e-06,
"loss": 0.5796,
"step": 5028
},
{
"epoch": 0.32,
"grad_norm": 0.8518129587173462,
"learning_rate": 7.970378356592779e-06,
"loss": 0.6571,
"step": 5029
},
{
"epoch": 0.32,
"grad_norm": 0.8910609483718872,
"learning_rate": 7.969552975346664e-06,
"loss": 0.6005,
"step": 5030
},
{
"epoch": 0.32,
"grad_norm": 0.9186645746231079,
"learning_rate": 7.968727469063005e-06,
"loss": 0.565,
"step": 5031
},
{
"epoch": 0.32,
"grad_norm": 0.8877920508384705,
"learning_rate": 7.967901837776559e-06,
"loss": 0.6059,
"step": 5032
},
{
"epoch": 0.32,
"grad_norm": 0.8737941384315491,
"learning_rate": 7.967076081522091e-06,
"loss": 0.6355,
"step": 5033
},
{
"epoch": 0.32,
"grad_norm": 0.9942765831947327,
"learning_rate": 7.966250200334373e-06,
"loss": 0.7055,
"step": 5034
},
{
"epoch": 0.32,
"grad_norm": 0.8824638724327087,
"learning_rate": 7.965424194248176e-06,
"loss": 0.5895,
"step": 5035
},
{
"epoch": 0.32,
"grad_norm": 0.9163713455200195,
"learning_rate": 7.964598063298282e-06,
"loss": 0.6511,
"step": 5036
},
{
"epoch": 0.32,
"grad_norm": 0.8366975784301758,
"learning_rate": 7.963771807519477e-06,
"loss": 0.5519,
"step": 5037
},
{
"epoch": 0.32,
"grad_norm": 0.8883844614028931,
"learning_rate": 7.962945426946552e-06,
"loss": 0.5771,
"step": 5038
},
{
"epoch": 0.32,
"grad_norm": 0.862853467464447,
"learning_rate": 7.962118921614302e-06,
"loss": 0.625,
"step": 5039
},
{
"epoch": 0.32,
"grad_norm": 0.8657647967338562,
"learning_rate": 7.961292291557529e-06,
"loss": 0.5691,
"step": 5040
},
{
"epoch": 0.32,
"grad_norm": 0.9227752089500427,
"learning_rate": 7.960465536811039e-06,
"loss": 0.602,
"step": 5041
},
{
"epoch": 0.32,
"grad_norm": 0.8963826298713684,
"learning_rate": 7.959638657409643e-06,
"loss": 0.5922,
"step": 5042
},
{
"epoch": 0.32,
"grad_norm": 0.8346092104911804,
"learning_rate": 7.95881165338816e-06,
"loss": 0.5559,
"step": 5043
},
{
"epoch": 0.32,
"grad_norm": 0.9060384631156921,
"learning_rate": 7.957984524781413e-06,
"loss": 0.6217,
"step": 5044
},
{
"epoch": 0.32,
"grad_norm": 0.9173614978790283,
"learning_rate": 7.957157271624225e-06,
"loss": 0.5922,
"step": 5045
},
{
"epoch": 0.32,
"grad_norm": 0.9467434883117676,
"learning_rate": 7.956329893951432e-06,
"loss": 0.6047,
"step": 5046
},
{
"epoch": 0.32,
"grad_norm": 1.0126010179519653,
"learning_rate": 7.95550239179787e-06,
"loss": 0.6554,
"step": 5047
},
{
"epoch": 0.32,
"grad_norm": 0.780703604221344,
"learning_rate": 7.954674765198386e-06,
"loss": 0.5616,
"step": 5048
},
{
"epoch": 0.32,
"grad_norm": 0.9652750492095947,
"learning_rate": 7.953847014187826e-06,
"loss": 0.6468,
"step": 5049
},
{
"epoch": 0.32,
"grad_norm": 0.9707852005958557,
"learning_rate": 7.953019138801045e-06,
"loss": 0.6298,
"step": 5050
},
{
"epoch": 0.32,
"grad_norm": 0.9064115881919861,
"learning_rate": 7.952191139072898e-06,
"loss": 0.6399,
"step": 5051
},
{
"epoch": 0.32,
"grad_norm": 0.9037145972251892,
"learning_rate": 7.951363015038254e-06,
"loss": 0.5806,
"step": 5052
},
{
"epoch": 0.32,
"grad_norm": 0.9301207065582275,
"learning_rate": 7.950534766731982e-06,
"loss": 0.6627,
"step": 5053
},
{
"epoch": 0.32,
"grad_norm": 0.8569024205207825,
"learning_rate": 7.949706394188951e-06,
"loss": 0.603,
"step": 5054
},
{
"epoch": 0.32,
"grad_norm": 0.9237979054450989,
"learning_rate": 7.948877897444047e-06,
"loss": 0.6087,
"step": 5055
},
{
"epoch": 0.32,
"grad_norm": 0.9367351531982422,
"learning_rate": 7.948049276532156e-06,
"loss": 0.6403,
"step": 5056
},
{
"epoch": 0.32,
"grad_norm": 0.8342140913009644,
"learning_rate": 7.94722053148816e-06,
"loss": 0.5697,
"step": 5057
},
{
"epoch": 0.32,
"grad_norm": 0.8935142755508423,
"learning_rate": 7.946391662346964e-06,
"loss": 0.6579,
"step": 5058
},
{
"epoch": 0.32,
"grad_norm": 0.9436396360397339,
"learning_rate": 7.945562669143463e-06,
"loss": 0.6328,
"step": 5059
},
{
"epoch": 0.32,
"grad_norm": 0.8714977502822876,
"learning_rate": 7.944733551912566e-06,
"loss": 0.5887,
"step": 5060
},
{
"epoch": 0.32,
"grad_norm": 0.8785292506217957,
"learning_rate": 7.943904310689184e-06,
"loss": 0.5927,
"step": 5061
},
{
"epoch": 0.32,
"grad_norm": 0.8961544036865234,
"learning_rate": 7.94307494550823e-06,
"loss": 0.6451,
"step": 5062
},
{
"epoch": 0.32,
"grad_norm": 0.9154882431030273,
"learning_rate": 7.94224545640463e-06,
"loss": 0.6019,
"step": 5063
},
{
"epoch": 0.32,
"grad_norm": 0.8385921716690063,
"learning_rate": 7.941415843413309e-06,
"loss": 0.5952,
"step": 5064
},
{
"epoch": 0.32,
"grad_norm": 0.8181779980659485,
"learning_rate": 7.940586106569198e-06,
"loss": 0.5941,
"step": 5065
},
{
"epoch": 0.32,
"grad_norm": 0.8897058367729187,
"learning_rate": 7.939756245907237e-06,
"loss": 0.6189,
"step": 5066
},
{
"epoch": 0.32,
"grad_norm": 0.9226515293121338,
"learning_rate": 7.938926261462366e-06,
"loss": 0.6463,
"step": 5067
},
{
"epoch": 0.32,
"grad_norm": 0.9354571104049683,
"learning_rate": 7.938096153269535e-06,
"loss": 0.6086,
"step": 5068
},
{
"epoch": 0.32,
"grad_norm": 0.8967651128768921,
"learning_rate": 7.937265921363695e-06,
"loss": 0.5475,
"step": 5069
},
{
"epoch": 0.32,
"grad_norm": 0.9139410853385925,
"learning_rate": 7.936435565779806e-06,
"loss": 0.5889,
"step": 5070
},
{
"epoch": 0.32,
"grad_norm": 0.894964337348938,
"learning_rate": 7.93560508655283e-06,
"loss": 0.59,
"step": 5071
},
{
"epoch": 0.32,
"grad_norm": 0.8929742574691772,
"learning_rate": 7.934774483717736e-06,
"loss": 0.5761,
"step": 5072
},
{
"epoch": 0.32,
"grad_norm": 0.8965078592300415,
"learning_rate": 7.933943757309498e-06,
"loss": 0.6356,
"step": 5073
},
{
"epoch": 0.32,
"grad_norm": 0.9059800505638123,
"learning_rate": 7.933112907363096e-06,
"loss": 0.5718,
"step": 5074
},
{
"epoch": 0.32,
"grad_norm": 0.8989181518554688,
"learning_rate": 7.93228193391351e-06,
"loss": 0.6147,
"step": 5075
},
{
"epoch": 0.32,
"grad_norm": 0.8238041400909424,
"learning_rate": 7.931450836995736e-06,
"loss": 0.5621,
"step": 5076
},
{
"epoch": 0.32,
"grad_norm": 0.8373918533325195,
"learning_rate": 7.930619616644761e-06,
"loss": 0.6033,
"step": 5077
},
{
"epoch": 0.32,
"grad_norm": 0.8767797946929932,
"learning_rate": 7.929788272895591e-06,
"loss": 0.6104,
"step": 5078
},
{
"epoch": 0.32,
"grad_norm": 0.9680573344230652,
"learning_rate": 7.928956805783228e-06,
"loss": 0.6186,
"step": 5079
},
{
"epoch": 0.32,
"grad_norm": 0.9051882028579712,
"learning_rate": 7.928125215342685e-06,
"loss": 0.6336,
"step": 5080
},
{
"epoch": 0.32,
"grad_norm": 0.9240115284919739,
"learning_rate": 7.927293501608975e-06,
"loss": 0.6207,
"step": 5081
},
{
"epoch": 0.32,
"grad_norm": 0.8769848346710205,
"learning_rate": 7.926461664617117e-06,
"loss": 0.6018,
"step": 5082
},
{
"epoch": 0.32,
"grad_norm": 0.7785282135009766,
"learning_rate": 7.92562970440214e-06,
"loss": 0.5859,
"step": 5083
},
{
"epoch": 0.32,
"grad_norm": 0.851161003112793,
"learning_rate": 7.924797620999074e-06,
"loss": 0.5716,
"step": 5084
},
{
"epoch": 0.32,
"grad_norm": 0.9216321706771851,
"learning_rate": 7.923965414442953e-06,
"loss": 0.6521,
"step": 5085
},
{
"epoch": 0.32,
"grad_norm": 0.9329628944396973,
"learning_rate": 7.923133084768822e-06,
"loss": 0.6118,
"step": 5086
},
{
"epoch": 0.32,
"grad_norm": 0.9301400184631348,
"learning_rate": 7.922300632011726e-06,
"loss": 0.6287,
"step": 5087
},
{
"epoch": 0.32,
"grad_norm": 0.8702458739280701,
"learning_rate": 7.921468056206715e-06,
"loss": 0.6279,
"step": 5088
},
{
"epoch": 0.32,
"grad_norm": 0.9146727323532104,
"learning_rate": 7.920635357388848e-06,
"loss": 0.5391,
"step": 5089
},
{
"epoch": 0.32,
"grad_norm": 0.8490307927131653,
"learning_rate": 7.919802535593185e-06,
"loss": 0.6225,
"step": 5090
},
{
"epoch": 0.32,
"grad_norm": 0.8508750796318054,
"learning_rate": 7.918969590854797e-06,
"loss": 0.6332,
"step": 5091
},
{
"epoch": 0.32,
"grad_norm": 0.8569998145103455,
"learning_rate": 7.91813652320875e-06,
"loss": 0.5871,
"step": 5092
},
{
"epoch": 0.32,
"grad_norm": 0.8754677176475525,
"learning_rate": 7.91730333269013e-06,
"loss": 0.6172,
"step": 5093
},
{
"epoch": 0.32,
"grad_norm": 0.9354038834571838,
"learning_rate": 7.916470019334012e-06,
"loss": 0.6477,
"step": 5094
},
{
"epoch": 0.32,
"grad_norm": 0.8642258048057556,
"learning_rate": 7.915636583175489e-06,
"loss": 0.5596,
"step": 5095
},
{
"epoch": 0.32,
"grad_norm": 0.9272780418395996,
"learning_rate": 7.91480302424965e-06,
"loss": 0.6006,
"step": 5096
},
{
"epoch": 0.32,
"grad_norm": 0.8797223567962646,
"learning_rate": 7.913969342591597e-06,
"loss": 0.5884,
"step": 5097
},
{
"epoch": 0.32,
"grad_norm": 0.8903371691703796,
"learning_rate": 7.913135538236432e-06,
"loss": 0.6636,
"step": 5098
},
{
"epoch": 0.32,
"grad_norm": 0.9428971409797668,
"learning_rate": 7.912301611219264e-06,
"loss": 0.5976,
"step": 5099
},
{
"epoch": 0.32,
"grad_norm": 0.8819142580032349,
"learning_rate": 7.911467561575204e-06,
"loss": 0.6289,
"step": 5100
},
{
"epoch": 0.32,
"grad_norm": 0.9539601802825928,
"learning_rate": 7.910633389339376e-06,
"loss": 0.6471,
"step": 5101
},
{
"epoch": 0.32,
"grad_norm": 0.816605806350708,
"learning_rate": 7.909799094546899e-06,
"loss": 0.5497,
"step": 5102
},
{
"epoch": 0.32,
"grad_norm": 0.8792059421539307,
"learning_rate": 7.908964677232906e-06,
"loss": 0.5623,
"step": 5103
},
{
"epoch": 0.32,
"grad_norm": 0.9570422172546387,
"learning_rate": 7.90813013743253e-06,
"loss": 0.628,
"step": 5104
},
{
"epoch": 0.32,
"grad_norm": 0.8918935060501099,
"learning_rate": 7.90729547518091e-06,
"loss": 0.6153,
"step": 5105
},
{
"epoch": 0.32,
"grad_norm": 0.9165834784507751,
"learning_rate": 7.906460690513192e-06,
"loss": 0.5937,
"step": 5106
},
{
"epoch": 0.32,
"grad_norm": 0.9291167259216309,
"learning_rate": 7.905625783464525e-06,
"loss": 0.6248,
"step": 5107
},
{
"epoch": 0.32,
"grad_norm": 0.8594471216201782,
"learning_rate": 7.904790754070063e-06,
"loss": 0.5916,
"step": 5108
},
{
"epoch": 0.32,
"grad_norm": 1.1345970630645752,
"learning_rate": 7.90395560236497e-06,
"loss": 0.5552,
"step": 5109
},
{
"epoch": 0.32,
"grad_norm": 0.8997986912727356,
"learning_rate": 7.903120328384406e-06,
"loss": 0.5698,
"step": 5110
},
{
"epoch": 0.32,
"grad_norm": 0.8802492618560791,
"learning_rate": 7.902284932163545e-06,
"loss": 0.5791,
"step": 5111
},
{
"epoch": 0.32,
"grad_norm": 0.8879519701004028,
"learning_rate": 7.901449413737562e-06,
"loss": 0.6044,
"step": 5112
},
{
"epoch": 0.32,
"grad_norm": 0.8550997972488403,
"learning_rate": 7.90061377314164e-06,
"loss": 0.6232,
"step": 5113
},
{
"epoch": 0.32,
"grad_norm": 0.8434523940086365,
"learning_rate": 7.899778010410958e-06,
"loss": 0.608,
"step": 5114
},
{
"epoch": 0.32,
"grad_norm": 0.8279407024383545,
"learning_rate": 7.898942125580715e-06,
"loss": 0.5741,
"step": 5115
},
{
"epoch": 0.32,
"grad_norm": 0.8244683742523193,
"learning_rate": 7.898106118686102e-06,
"loss": 0.5697,
"step": 5116
},
{
"epoch": 0.32,
"grad_norm": 0.9236017465591431,
"learning_rate": 7.897269989762322e-06,
"loss": 0.6289,
"step": 5117
},
{
"epoch": 0.32,
"grad_norm": 0.921940803527832,
"learning_rate": 7.896433738844583e-06,
"loss": 0.5958,
"step": 5118
},
{
"epoch": 0.32,
"grad_norm": 0.8436870574951172,
"learning_rate": 7.895597365968093e-06,
"loss": 0.6159,
"step": 5119
},
{
"epoch": 0.32,
"grad_norm": 0.8956601619720459,
"learning_rate": 7.894760871168074e-06,
"loss": 0.6182,
"step": 5120
},
{
"epoch": 0.32,
"grad_norm": 0.9135102033615112,
"learning_rate": 7.893924254479744e-06,
"loss": 0.6239,
"step": 5121
},
{
"epoch": 0.32,
"grad_norm": 0.9187552332878113,
"learning_rate": 7.893087515938329e-06,
"loss": 0.6376,
"step": 5122
},
{
"epoch": 0.32,
"grad_norm": 0.9064997434616089,
"learning_rate": 7.892250655579063e-06,
"loss": 0.6092,
"step": 5123
},
{
"epoch": 0.32,
"grad_norm": 0.9091038107872009,
"learning_rate": 7.891413673437185e-06,
"loss": 0.5968,
"step": 5124
},
{
"epoch": 0.32,
"grad_norm": 0.9700572490692139,
"learning_rate": 7.890576569547937e-06,
"loss": 0.6382,
"step": 5125
},
{
"epoch": 0.32,
"grad_norm": 0.8647416830062866,
"learning_rate": 7.889739343946561e-06,
"loss": 0.646,
"step": 5126
},
{
"epoch": 0.32,
"grad_norm": 0.8427348732948303,
"learning_rate": 7.888901996668317e-06,
"loss": 0.5889,
"step": 5127
},
{
"epoch": 0.32,
"grad_norm": 0.8934534192085266,
"learning_rate": 7.888064527748458e-06,
"loss": 0.6227,
"step": 5128
},
{
"epoch": 0.32,
"grad_norm": 0.9173450469970703,
"learning_rate": 7.887226937222252e-06,
"loss": 0.6438,
"step": 5129
},
{
"epoch": 0.33,
"grad_norm": 0.8801364898681641,
"learning_rate": 7.88638922512496e-06,
"loss": 0.6402,
"step": 5130
},
{
"epoch": 0.33,
"grad_norm": 0.9062999486923218,
"learning_rate": 7.88555139149186e-06,
"loss": 0.6434,
"step": 5131
},
{
"epoch": 0.33,
"grad_norm": 0.9464401602745056,
"learning_rate": 7.884713436358228e-06,
"loss": 0.5997,
"step": 5132
},
{
"epoch": 0.33,
"grad_norm": 0.889105498790741,
"learning_rate": 7.883875359759349e-06,
"loss": 0.6423,
"step": 5133
},
{
"epoch": 0.33,
"grad_norm": 0.9038829803466797,
"learning_rate": 7.883037161730511e-06,
"loss": 0.6008,
"step": 5134
},
{
"epoch": 0.33,
"grad_norm": 0.8674249649047852,
"learning_rate": 7.882198842307008e-06,
"loss": 0.605,
"step": 5135
},
{
"epoch": 0.33,
"grad_norm": 0.9415613412857056,
"learning_rate": 7.881360401524138e-06,
"loss": 0.6118,
"step": 5136
},
{
"epoch": 0.33,
"grad_norm": 0.9558926820755005,
"learning_rate": 7.880521839417206e-06,
"loss": 0.5798,
"step": 5137
},
{
"epoch": 0.33,
"grad_norm": 0.8905767798423767,
"learning_rate": 7.879683156021518e-06,
"loss": 0.611,
"step": 5138
},
{
"epoch": 0.33,
"grad_norm": 0.9160702228546143,
"learning_rate": 7.87884435137239e-06,
"loss": 0.6068,
"step": 5139
},
{
"epoch": 0.33,
"grad_norm": 0.8442513942718506,
"learning_rate": 7.878005425505143e-06,
"loss": 0.5846,
"step": 5140
},
{
"epoch": 0.33,
"grad_norm": 0.8762052059173584,
"learning_rate": 7.877166378455098e-06,
"loss": 0.6208,
"step": 5141
},
{
"epoch": 0.33,
"grad_norm": 0.934578537940979,
"learning_rate": 7.876327210257586e-06,
"loss": 0.6083,
"step": 5142
},
{
"epoch": 0.33,
"grad_norm": 0.899614155292511,
"learning_rate": 7.875487920947941e-06,
"loss": 0.6371,
"step": 5143
},
{
"epoch": 0.33,
"grad_norm": 0.8895543217658997,
"learning_rate": 7.874648510561503e-06,
"loss": 0.6393,
"step": 5144
},
{
"epoch": 0.33,
"grad_norm": 0.8901795744895935,
"learning_rate": 7.873808979133616e-06,
"loss": 0.6394,
"step": 5145
},
{
"epoch": 0.33,
"grad_norm": 0.8974289894104004,
"learning_rate": 7.872969326699631e-06,
"loss": 0.5565,
"step": 5146
},
{
"epoch": 0.33,
"grad_norm": 0.9629907608032227,
"learning_rate": 7.8721295532949e-06,
"loss": 0.6353,
"step": 5147
},
{
"epoch": 0.33,
"grad_norm": 0.91104656457901,
"learning_rate": 7.871289658954789e-06,
"loss": 0.6165,
"step": 5148
},
{
"epoch": 0.33,
"grad_norm": 0.9044172763824463,
"learning_rate": 7.870449643714654e-06,
"loss": 0.6053,
"step": 5149
},
{
"epoch": 0.33,
"grad_norm": 0.823835015296936,
"learning_rate": 7.869609507609874e-06,
"loss": 0.5482,
"step": 5150
},
{
"epoch": 0.33,
"grad_norm": 0.8716912865638733,
"learning_rate": 7.868769250675818e-06,
"loss": 0.6004,
"step": 5151
},
{
"epoch": 0.33,
"grad_norm": 0.8245472311973572,
"learning_rate": 7.867928872947869e-06,
"loss": 0.5591,
"step": 5152
},
{
"epoch": 0.33,
"grad_norm": 0.8959210515022278,
"learning_rate": 7.867088374461413e-06,
"loss": 0.6253,
"step": 5153
},
{
"epoch": 0.33,
"grad_norm": 0.8492079377174377,
"learning_rate": 7.866247755251838e-06,
"loss": 0.6169,
"step": 5154
},
{
"epoch": 0.33,
"grad_norm": 0.8312681317329407,
"learning_rate": 7.865407015354542e-06,
"loss": 0.5774,
"step": 5155
},
{
"epoch": 0.33,
"grad_norm": 0.9149585962295532,
"learning_rate": 7.864566154804925e-06,
"loss": 0.6262,
"step": 5156
},
{
"epoch": 0.33,
"grad_norm": 0.8946517705917358,
"learning_rate": 7.86372517363839e-06,
"loss": 0.563,
"step": 5157
},
{
"epoch": 0.33,
"grad_norm": 0.8930898904800415,
"learning_rate": 7.862884071890353e-06,
"loss": 0.6389,
"step": 5158
},
{
"epoch": 0.33,
"grad_norm": 0.8389832973480225,
"learning_rate": 7.862042849596225e-06,
"loss": 0.5617,
"step": 5159
},
{
"epoch": 0.33,
"grad_norm": 0.8541855216026306,
"learning_rate": 7.86120150679143e-06,
"loss": 0.6236,
"step": 5160
},
{
"epoch": 0.33,
"grad_norm": 0.8012550473213196,
"learning_rate": 7.860360043511392e-06,
"loss": 0.5796,
"step": 5161
},
{
"epoch": 0.33,
"grad_norm": 0.8516356348991394,
"learning_rate": 7.859518459791543e-06,
"loss": 0.6257,
"step": 5162
},
{
"epoch": 0.33,
"grad_norm": 0.8946587443351746,
"learning_rate": 7.85867675566732e-06,
"loss": 0.5812,
"step": 5163
},
{
"epoch": 0.33,
"grad_norm": 0.8651425838470459,
"learning_rate": 7.857834931174164e-06,
"loss": 0.6141,
"step": 5164
},
{
"epoch": 0.33,
"grad_norm": 0.8876100182533264,
"learning_rate": 7.85699298634752e-06,
"loss": 0.6099,
"step": 5165
},
{
"epoch": 0.33,
"grad_norm": 0.8882783055305481,
"learning_rate": 7.856150921222838e-06,
"loss": 0.5971,
"step": 5166
},
{
"epoch": 0.33,
"grad_norm": 0.862937867641449,
"learning_rate": 7.85530873583558e-06,
"loss": 0.6422,
"step": 5167
},
{
"epoch": 0.33,
"grad_norm": 0.8829284906387329,
"learning_rate": 7.854466430221203e-06,
"loss": 0.5815,
"step": 5168
},
{
"epoch": 0.33,
"grad_norm": 0.8932998776435852,
"learning_rate": 7.853624004415172e-06,
"loss": 0.6657,
"step": 5169
},
{
"epoch": 0.33,
"grad_norm": 0.8481628894805908,
"learning_rate": 7.852781458452964e-06,
"loss": 0.6036,
"step": 5170
},
{
"epoch": 0.33,
"grad_norm": 0.958634614944458,
"learning_rate": 7.851938792370053e-06,
"loss": 0.6527,
"step": 5171
},
{
"epoch": 0.33,
"grad_norm": 0.8003389239311218,
"learning_rate": 7.85109600620192e-06,
"loss": 0.5318,
"step": 5172
},
{
"epoch": 0.33,
"grad_norm": 0.922940731048584,
"learning_rate": 7.85025309998405e-06,
"loss": 0.5826,
"step": 5173
},
{
"epoch": 0.33,
"grad_norm": 0.8557353019714355,
"learning_rate": 7.849410073751942e-06,
"loss": 0.5537,
"step": 5174
},
{
"epoch": 0.33,
"grad_norm": 0.9081326723098755,
"learning_rate": 7.848566927541084e-06,
"loss": 0.5954,
"step": 5175
},
{
"epoch": 0.33,
"grad_norm": 0.8481424450874329,
"learning_rate": 7.847723661386985e-06,
"loss": 0.582,
"step": 5176
},
{
"epoch": 0.33,
"grad_norm": 0.9431670308113098,
"learning_rate": 7.846880275325149e-06,
"loss": 0.6132,
"step": 5177
},
{
"epoch": 0.33,
"grad_norm": 0.827930748462677,
"learning_rate": 7.846036769391086e-06,
"loss": 0.612,
"step": 5178
},
{
"epoch": 0.33,
"grad_norm": 0.8801954984664917,
"learning_rate": 7.845193143620316e-06,
"loss": 0.6171,
"step": 5179
},
{
"epoch": 0.33,
"grad_norm": 0.9372230768203735,
"learning_rate": 7.84434939804836e-06,
"loss": 0.636,
"step": 5180
},
{
"epoch": 0.33,
"grad_norm": 0.9458149075508118,
"learning_rate": 7.843505532710748e-06,
"loss": 0.6446,
"step": 5181
},
{
"epoch": 0.33,
"grad_norm": 0.8717585802078247,
"learning_rate": 7.84266154764301e-06,
"loss": 0.6349,
"step": 5182
},
{
"epoch": 0.33,
"grad_norm": 0.8793720602989197,
"learning_rate": 7.84181744288068e-06,
"loss": 0.6381,
"step": 5183
},
{
"epoch": 0.33,
"grad_norm": 0.8223835229873657,
"learning_rate": 7.840973218459305e-06,
"loss": 0.5489,
"step": 5184
},
{
"epoch": 0.33,
"grad_norm": 0.9283150434494019,
"learning_rate": 7.84012887441443e-06,
"loss": 0.6059,
"step": 5185
},
{
"epoch": 0.33,
"grad_norm": 0.9703242778778076,
"learning_rate": 7.839284410781609e-06,
"loss": 0.5753,
"step": 5186
},
{
"epoch": 0.33,
"grad_norm": 0.8721915483474731,
"learning_rate": 7.838439827596398e-06,
"loss": 0.624,
"step": 5187
},
{
"epoch": 0.33,
"grad_norm": 0.9139184355735779,
"learning_rate": 7.83759512489436e-06,
"loss": 0.6585,
"step": 5188
},
{
"epoch": 0.33,
"grad_norm": 0.8563583493232727,
"learning_rate": 7.836750302711065e-06,
"loss": 0.5913,
"step": 5189
},
{
"epoch": 0.33,
"grad_norm": 0.8751399517059326,
"learning_rate": 7.83590536108208e-06,
"loss": 0.5832,
"step": 5190
},
{
"epoch": 0.33,
"grad_norm": 0.8799748420715332,
"learning_rate": 7.835060300042986e-06,
"loss": 0.6191,
"step": 5191
},
{
"epoch": 0.33,
"grad_norm": 0.8769707679748535,
"learning_rate": 7.834215119629366e-06,
"loss": 0.5834,
"step": 5192
},
{
"epoch": 0.33,
"grad_norm": 0.8527321219444275,
"learning_rate": 7.833369819876809e-06,
"loss": 0.6034,
"step": 5193
},
{
"epoch": 0.33,
"grad_norm": 0.9315845370292664,
"learning_rate": 7.832524400820902e-06,
"loss": 0.5899,
"step": 5194
},
{
"epoch": 0.33,
"grad_norm": 0.9396250247955322,
"learning_rate": 7.831678862497248e-06,
"loss": 0.6207,
"step": 5195
},
{
"epoch": 0.33,
"grad_norm": 0.9503593444824219,
"learning_rate": 7.830833204941446e-06,
"loss": 0.655,
"step": 5196
},
{
"epoch": 0.33,
"grad_norm": 0.8898603916168213,
"learning_rate": 7.829987428189108e-06,
"loss": 0.6509,
"step": 5197
},
{
"epoch": 0.33,
"grad_norm": 0.85368812084198,
"learning_rate": 7.829141532275843e-06,
"loss": 0.5851,
"step": 5198
},
{
"epoch": 0.33,
"grad_norm": 0.9276217222213745,
"learning_rate": 7.82829551723727e-06,
"loss": 0.6307,
"step": 5199
},
{
"epoch": 0.33,
"grad_norm": 0.8713779449462891,
"learning_rate": 7.827449383109012e-06,
"loss": 0.5739,
"step": 5200
},
{
"epoch": 0.33,
"grad_norm": 0.9299573302268982,
"learning_rate": 7.826603129926696e-06,
"loss": 0.6233,
"step": 5201
},
{
"epoch": 0.33,
"grad_norm": 0.8474642634391785,
"learning_rate": 7.825756757725956e-06,
"loss": 0.6163,
"step": 5202
},
{
"epoch": 0.33,
"grad_norm": 0.9293124079704285,
"learning_rate": 7.824910266542426e-06,
"loss": 0.6704,
"step": 5203
},
{
"epoch": 0.33,
"grad_norm": 0.8167198896408081,
"learning_rate": 7.824063656411756e-06,
"loss": 0.5296,
"step": 5204
},
{
"epoch": 0.33,
"grad_norm": 0.8058587908744812,
"learning_rate": 7.823216927369588e-06,
"loss": 0.5909,
"step": 5205
},
{
"epoch": 0.33,
"grad_norm": 0.9068382382392883,
"learning_rate": 7.822370079451576e-06,
"loss": 0.6536,
"step": 5206
},
{
"epoch": 0.33,
"grad_norm": 0.8225257396697998,
"learning_rate": 7.821523112693377e-06,
"loss": 0.5772,
"step": 5207
},
{
"epoch": 0.33,
"grad_norm": 0.8437464833259583,
"learning_rate": 7.820676027130657e-06,
"loss": 0.5401,
"step": 5208
},
{
"epoch": 0.33,
"grad_norm": 0.8533555865287781,
"learning_rate": 7.81982882279908e-06,
"loss": 0.5964,
"step": 5209
},
{
"epoch": 0.33,
"grad_norm": 0.8721039295196533,
"learning_rate": 7.818981499734323e-06,
"loss": 0.5896,
"step": 5210
},
{
"epoch": 0.33,
"grad_norm": 0.8488752841949463,
"learning_rate": 7.818134057972062e-06,
"loss": 0.5717,
"step": 5211
},
{
"epoch": 0.33,
"grad_norm": 0.8960286378860474,
"learning_rate": 7.817286497547977e-06,
"loss": 0.5905,
"step": 5212
},
{
"epoch": 0.33,
"grad_norm": 0.9279623627662659,
"learning_rate": 7.81643881849776e-06,
"loss": 0.6313,
"step": 5213
},
{
"epoch": 0.33,
"grad_norm": 0.8772743940353394,
"learning_rate": 7.815591020857101e-06,
"loss": 0.6305,
"step": 5214
},
{
"epoch": 0.33,
"grad_norm": 0.9260540008544922,
"learning_rate": 7.8147431046617e-06,
"loss": 0.5967,
"step": 5215
},
{
"epoch": 0.33,
"grad_norm": 0.9077113270759583,
"learning_rate": 7.813895069947257e-06,
"loss": 0.6454,
"step": 5216
},
{
"epoch": 0.33,
"grad_norm": 0.8971432447433472,
"learning_rate": 7.813046916749483e-06,
"loss": 0.6458,
"step": 5217
},
{
"epoch": 0.33,
"grad_norm": 0.9562937617301941,
"learning_rate": 7.812198645104088e-06,
"loss": 0.6051,
"step": 5218
},
{
"epoch": 0.33,
"grad_norm": 0.9105967879295349,
"learning_rate": 7.811350255046792e-06,
"loss": 0.5909,
"step": 5219
},
{
"epoch": 0.33,
"grad_norm": 0.890044629573822,
"learning_rate": 7.810501746613316e-06,
"loss": 0.5771,
"step": 5220
},
{
"epoch": 0.33,
"grad_norm": 0.9129796028137207,
"learning_rate": 7.809653119839389e-06,
"loss": 0.5991,
"step": 5221
},
{
"epoch": 0.33,
"grad_norm": 0.9497199654579163,
"learning_rate": 7.808804374760742e-06,
"loss": 0.6003,
"step": 5222
},
{
"epoch": 0.33,
"grad_norm": 0.9804506301879883,
"learning_rate": 7.807955511413114e-06,
"loss": 0.6147,
"step": 5223
},
{
"epoch": 0.33,
"grad_norm": 0.8824604749679565,
"learning_rate": 7.80710652983225e-06,
"loss": 0.5427,
"step": 5224
},
{
"epoch": 0.33,
"grad_norm": 0.9190927743911743,
"learning_rate": 7.806257430053893e-06,
"loss": 0.5981,
"step": 5225
},
{
"epoch": 0.33,
"grad_norm": 0.9122849702835083,
"learning_rate": 7.8054082121138e-06,
"loss": 0.6007,
"step": 5226
},
{
"epoch": 0.33,
"grad_norm": 0.8687419295310974,
"learning_rate": 7.804558876047724e-06,
"loss": 0.6064,
"step": 5227
},
{
"epoch": 0.33,
"grad_norm": 0.8574259281158447,
"learning_rate": 7.80370942189143e-06,
"loss": 0.5869,
"step": 5228
},
{
"epoch": 0.33,
"grad_norm": 0.9501886367797852,
"learning_rate": 7.802859849680686e-06,
"loss": 0.6068,
"step": 5229
},
{
"epoch": 0.33,
"grad_norm": 0.8507223725318909,
"learning_rate": 7.802010159451267e-06,
"loss": 0.6234,
"step": 5230
},
{
"epoch": 0.33,
"grad_norm": 0.9953079223632812,
"learning_rate": 7.801160351238945e-06,
"loss": 0.6017,
"step": 5231
},
{
"epoch": 0.33,
"grad_norm": 0.8714452385902405,
"learning_rate": 7.800310425079505e-06,
"loss": 0.5615,
"step": 5232
},
{
"epoch": 0.33,
"grad_norm": 0.9719001054763794,
"learning_rate": 7.799460381008736e-06,
"loss": 0.5711,
"step": 5233
},
{
"epoch": 0.33,
"grad_norm": 0.889895498752594,
"learning_rate": 7.798610219062428e-06,
"loss": 0.6251,
"step": 5234
},
{
"epoch": 0.33,
"grad_norm": 0.8774588108062744,
"learning_rate": 7.79775993927638e-06,
"loss": 0.5806,
"step": 5235
},
{
"epoch": 0.33,
"grad_norm": 0.8740803599357605,
"learning_rate": 7.796909541686392e-06,
"loss": 0.5344,
"step": 5236
},
{
"epoch": 0.33,
"grad_norm": 0.8627974987030029,
"learning_rate": 7.796059026328274e-06,
"loss": 0.6076,
"step": 5237
},
{
"epoch": 0.33,
"grad_norm": 0.858439564704895,
"learning_rate": 7.795208393237839e-06,
"loss": 0.5604,
"step": 5238
},
{
"epoch": 0.33,
"grad_norm": 0.8907666802406311,
"learning_rate": 7.794357642450899e-06,
"loss": 0.6255,
"step": 5239
},
{
"epoch": 0.33,
"grad_norm": 0.8581748008728027,
"learning_rate": 7.793506774003282e-06,
"loss": 0.6136,
"step": 5240
},
{
"epoch": 0.33,
"grad_norm": 0.9139533638954163,
"learning_rate": 7.792655787930811e-06,
"loss": 0.6128,
"step": 5241
},
{
"epoch": 0.33,
"grad_norm": 0.8762749433517456,
"learning_rate": 7.791804684269322e-06,
"loss": 0.6169,
"step": 5242
},
{
"epoch": 0.33,
"grad_norm": 0.8736821413040161,
"learning_rate": 7.790953463054647e-06,
"loss": 0.6287,
"step": 5243
},
{
"epoch": 0.33,
"grad_norm": 0.8526340126991272,
"learning_rate": 7.790102124322633e-06,
"loss": 0.5913,
"step": 5244
},
{
"epoch": 0.33,
"grad_norm": 0.9196691513061523,
"learning_rate": 7.789250668109124e-06,
"loss": 0.6574,
"step": 5245
},
{
"epoch": 0.33,
"grad_norm": 0.8942427039146423,
"learning_rate": 7.788399094449971e-06,
"loss": 0.6133,
"step": 5246
},
{
"epoch": 0.33,
"grad_norm": 0.8590309023857117,
"learning_rate": 7.787547403381033e-06,
"loss": 0.5746,
"step": 5247
},
{
"epoch": 0.33,
"grad_norm": 0.9016396403312683,
"learning_rate": 7.786695594938172e-06,
"loss": 0.5917,
"step": 5248
},
{
"epoch": 0.33,
"grad_norm": 0.9520177245140076,
"learning_rate": 7.785843669157253e-06,
"loss": 0.657,
"step": 5249
},
{
"epoch": 0.33,
"grad_norm": 0.9555111527442932,
"learning_rate": 7.784991626074148e-06,
"loss": 0.6724,
"step": 5250
},
{
"epoch": 0.33,
"grad_norm": 0.9693423509597778,
"learning_rate": 7.784139465724734e-06,
"loss": 0.6453,
"step": 5251
},
{
"epoch": 0.33,
"grad_norm": 0.8132855296134949,
"learning_rate": 7.783287188144893e-06,
"loss": 0.5865,
"step": 5252
},
{
"epoch": 0.33,
"grad_norm": 0.8263188004493713,
"learning_rate": 7.78243479337051e-06,
"loss": 0.6248,
"step": 5253
},
{
"epoch": 0.33,
"grad_norm": 0.8053151369094849,
"learning_rate": 7.781582281437479e-06,
"loss": 0.5827,
"step": 5254
},
{
"epoch": 0.33,
"grad_norm": 0.9075903296470642,
"learning_rate": 7.780729652381694e-06,
"loss": 0.6344,
"step": 5255
},
{
"epoch": 0.33,
"grad_norm": 0.8380961418151855,
"learning_rate": 7.779876906239055e-06,
"loss": 0.607,
"step": 5256
},
{
"epoch": 0.33,
"grad_norm": 0.892805814743042,
"learning_rate": 7.779024043045471e-06,
"loss": 0.6279,
"step": 5257
},
{
"epoch": 0.33,
"grad_norm": 0.9007843136787415,
"learning_rate": 7.778171062836853e-06,
"loss": 0.6653,
"step": 5258
},
{
"epoch": 0.33,
"grad_norm": 0.9166417717933655,
"learning_rate": 7.777317965649114e-06,
"loss": 0.632,
"step": 5259
},
{
"epoch": 0.33,
"grad_norm": 0.9331604838371277,
"learning_rate": 7.776464751518177e-06,
"loss": 0.6262,
"step": 5260
},
{
"epoch": 0.33,
"grad_norm": 0.8771944642066956,
"learning_rate": 7.775611420479971e-06,
"loss": 0.5521,
"step": 5261
},
{
"epoch": 0.33,
"grad_norm": 0.8667744398117065,
"learning_rate": 7.774757972570423e-06,
"loss": 0.5917,
"step": 5262
},
{
"epoch": 0.33,
"grad_norm": 0.9010536074638367,
"learning_rate": 7.773904407825467e-06,
"loss": 0.6459,
"step": 5263
},
{
"epoch": 0.33,
"grad_norm": 0.8958863615989685,
"learning_rate": 7.773050726281048e-06,
"loss": 0.5939,
"step": 5264
},
{
"epoch": 0.33,
"grad_norm": 0.9226192831993103,
"learning_rate": 7.772196927973109e-06,
"loss": 0.6127,
"step": 5265
},
{
"epoch": 0.33,
"grad_norm": 0.8647396564483643,
"learning_rate": 7.771343012937602e-06,
"loss": 0.6057,
"step": 5266
},
{
"epoch": 0.33,
"grad_norm": 0.9021638631820679,
"learning_rate": 7.77048898121048e-06,
"loss": 0.6024,
"step": 5267
},
{
"epoch": 0.33,
"grad_norm": 0.9035550355911255,
"learning_rate": 7.769634832827706e-06,
"loss": 0.577,
"step": 5268
},
{
"epoch": 0.33,
"grad_norm": 0.8818480968475342,
"learning_rate": 7.768780567825243e-06,
"loss": 0.5895,
"step": 5269
},
{
"epoch": 0.33,
"grad_norm": 0.8871473670005798,
"learning_rate": 7.767926186239064e-06,
"loss": 0.6386,
"step": 5270
},
{
"epoch": 0.33,
"grad_norm": 0.9286932945251465,
"learning_rate": 7.76707168810514e-06,
"loss": 0.6352,
"step": 5271
},
{
"epoch": 0.33,
"grad_norm": 0.8643122315406799,
"learning_rate": 7.766217073459454e-06,
"loss": 0.5854,
"step": 5272
},
{
"epoch": 0.33,
"grad_norm": 0.8689426183700562,
"learning_rate": 7.765362342337991e-06,
"loss": 0.6032,
"step": 5273
},
{
"epoch": 0.33,
"grad_norm": 0.8007031679153442,
"learning_rate": 7.76450749477674e-06,
"loss": 0.5664,
"step": 5274
},
{
"epoch": 0.33,
"grad_norm": 0.8409014940261841,
"learning_rate": 7.763652530811692e-06,
"loss": 0.5953,
"step": 5275
},
{
"epoch": 0.33,
"grad_norm": 0.8317943215370178,
"learning_rate": 7.762797450478853e-06,
"loss": 0.6057,
"step": 5276
},
{
"epoch": 0.33,
"grad_norm": 0.8628614544868469,
"learning_rate": 7.761942253814225e-06,
"loss": 0.6164,
"step": 5277
},
{
"epoch": 0.33,
"grad_norm": 0.87236487865448,
"learning_rate": 7.761086940853814e-06,
"loss": 0.5065,
"step": 5278
},
{
"epoch": 0.33,
"grad_norm": 0.9762303233146667,
"learning_rate": 7.76023151163364e-06,
"loss": 0.5775,
"step": 5279
},
{
"epoch": 0.33,
"grad_norm": 0.907646119594574,
"learning_rate": 7.759375966189718e-06,
"loss": 0.601,
"step": 5280
},
{
"epoch": 0.33,
"grad_norm": 0.9219939112663269,
"learning_rate": 7.758520304558072e-06,
"loss": 0.5912,
"step": 5281
},
{
"epoch": 0.33,
"grad_norm": 0.9645958542823792,
"learning_rate": 7.757664526774733e-06,
"loss": 0.6087,
"step": 5282
},
{
"epoch": 0.33,
"grad_norm": 0.9233863353729248,
"learning_rate": 7.756808632875737e-06,
"loss": 0.6331,
"step": 5283
},
{
"epoch": 0.33,
"grad_norm": 0.8966994285583496,
"learning_rate": 7.755952622897117e-06,
"loss": 0.5706,
"step": 5284
},
{
"epoch": 0.33,
"grad_norm": 0.9332131743431091,
"learning_rate": 7.755096496874918e-06,
"loss": 0.5962,
"step": 5285
},
{
"epoch": 0.33,
"grad_norm": 0.8440611958503723,
"learning_rate": 7.75424025484519e-06,
"loss": 0.6056,
"step": 5286
},
{
"epoch": 0.33,
"grad_norm": 0.9401943683624268,
"learning_rate": 7.753383896843988e-06,
"loss": 0.6501,
"step": 5287
},
{
"epoch": 0.34,
"grad_norm": 0.8421300053596497,
"learning_rate": 7.752527422907368e-06,
"loss": 0.5683,
"step": 5288
},
{
"epoch": 0.34,
"grad_norm": 0.8217456340789795,
"learning_rate": 7.751670833071393e-06,
"loss": 0.5881,
"step": 5289
},
{
"epoch": 0.34,
"grad_norm": 0.9123767018318176,
"learning_rate": 7.750814127372131e-06,
"loss": 0.5491,
"step": 5290
},
{
"epoch": 0.34,
"grad_norm": 0.875048816204071,
"learning_rate": 7.749957305845656e-06,
"loss": 0.5582,
"step": 5291
},
{
"epoch": 0.34,
"grad_norm": 0.9074432253837585,
"learning_rate": 7.749100368528047e-06,
"loss": 0.6511,
"step": 5292
},
{
"epoch": 0.34,
"grad_norm": 0.8981906771659851,
"learning_rate": 7.748243315455382e-06,
"loss": 0.624,
"step": 5293
},
{
"epoch": 0.34,
"grad_norm": 0.8196624517440796,
"learning_rate": 7.747386146663753e-06,
"loss": 0.5937,
"step": 5294
},
{
"epoch": 0.34,
"grad_norm": 0.88856440782547,
"learning_rate": 7.746528862189251e-06,
"loss": 0.6291,
"step": 5295
},
{
"epoch": 0.34,
"grad_norm": 0.8899400234222412,
"learning_rate": 7.745671462067974e-06,
"loss": 0.6181,
"step": 5296
},
{
"epoch": 0.34,
"grad_norm": 0.910403847694397,
"learning_rate": 7.74481394633602e-06,
"loss": 0.628,
"step": 5297
},
{
"epoch": 0.34,
"grad_norm": 0.9819753170013428,
"learning_rate": 7.743956315029502e-06,
"loss": 0.6307,
"step": 5298
},
{
"epoch": 0.34,
"grad_norm": 0.9036092758178711,
"learning_rate": 7.743098568184529e-06,
"loss": 0.594,
"step": 5299
},
{
"epoch": 0.34,
"grad_norm": 0.9001262784004211,
"learning_rate": 7.742240705837217e-06,
"loss": 0.5737,
"step": 5300
},
{
"epoch": 0.34,
"grad_norm": 0.8720340132713318,
"learning_rate": 7.741382728023687e-06,
"loss": 0.6166,
"step": 5301
},
{
"epoch": 0.34,
"grad_norm": 0.8694612383842468,
"learning_rate": 7.74052463478007e-06,
"loss": 0.5872,
"step": 5302
},
{
"epoch": 0.34,
"grad_norm": 0.9097409844398499,
"learning_rate": 7.739666426142493e-06,
"loss": 0.5977,
"step": 5303
},
{
"epoch": 0.34,
"grad_norm": 0.9092093706130981,
"learning_rate": 7.738808102147093e-06,
"loss": 0.5701,
"step": 5304
},
{
"epoch": 0.34,
"grad_norm": 0.9413781762123108,
"learning_rate": 7.737949662830012e-06,
"loss": 0.6675,
"step": 5305
},
{
"epoch": 0.34,
"grad_norm": 0.9250045418739319,
"learning_rate": 7.737091108227395e-06,
"loss": 0.6216,
"step": 5306
},
{
"epoch": 0.34,
"grad_norm": 0.9547144174575806,
"learning_rate": 7.736232438375391e-06,
"loss": 0.6255,
"step": 5307
},
{
"epoch": 0.34,
"grad_norm": 0.8688421845436096,
"learning_rate": 7.735373653310161e-06,
"loss": 0.5789,
"step": 5308
},
{
"epoch": 0.34,
"grad_norm": 0.890227198600769,
"learning_rate": 7.73451475306786e-06,
"loss": 0.6464,
"step": 5309
},
{
"epoch": 0.34,
"grad_norm": 0.922257661819458,
"learning_rate": 7.733655737684657e-06,
"loss": 0.5882,
"step": 5310
},
{
"epoch": 0.34,
"grad_norm": 0.8927624821662903,
"learning_rate": 7.732796607196719e-06,
"loss": 0.6208,
"step": 5311
},
{
"epoch": 0.34,
"grad_norm": 0.9111786484718323,
"learning_rate": 7.731937361640223e-06,
"loss": 0.6283,
"step": 5312
},
{
"epoch": 0.34,
"grad_norm": 0.8802262544631958,
"learning_rate": 7.73107800105135e-06,
"loss": 0.6081,
"step": 5313
},
{
"epoch": 0.34,
"grad_norm": 0.9143234491348267,
"learning_rate": 7.730218525466283e-06,
"loss": 0.6289,
"step": 5314
},
{
"epoch": 0.34,
"grad_norm": 0.8903287649154663,
"learning_rate": 7.729358934921209e-06,
"loss": 0.6062,
"step": 5315
},
{
"epoch": 0.34,
"grad_norm": 0.8877756595611572,
"learning_rate": 7.728499229452326e-06,
"loss": 0.5689,
"step": 5316
},
{
"epoch": 0.34,
"grad_norm": 0.9442094564437866,
"learning_rate": 7.727639409095833e-06,
"loss": 0.6616,
"step": 5317
},
{
"epoch": 0.34,
"grad_norm": 0.9445149302482605,
"learning_rate": 7.726779473887933e-06,
"loss": 0.6455,
"step": 5318
},
{
"epoch": 0.34,
"grad_norm": 0.8811274766921997,
"learning_rate": 7.725919423864837e-06,
"loss": 0.6276,
"step": 5319
},
{
"epoch": 0.34,
"grad_norm": 0.8983349800109863,
"learning_rate": 7.725059259062753e-06,
"loss": 0.6362,
"step": 5320
},
{
"epoch": 0.34,
"grad_norm": 0.8891294002532959,
"learning_rate": 7.724198979517905e-06,
"loss": 0.59,
"step": 5321
},
{
"epoch": 0.34,
"grad_norm": 0.9269400238990784,
"learning_rate": 7.723338585266515e-06,
"loss": 0.6242,
"step": 5322
},
{
"epoch": 0.34,
"grad_norm": 0.8992114067077637,
"learning_rate": 7.722478076344812e-06,
"loss": 0.5932,
"step": 5323
},
{
"epoch": 0.34,
"grad_norm": 0.9274572134017944,
"learning_rate": 7.721617452789028e-06,
"loss": 0.6501,
"step": 5324
},
{
"epoch": 0.34,
"grad_norm": 0.8954104781150818,
"learning_rate": 7.7207567146354e-06,
"loss": 0.6378,
"step": 5325
},
{
"epoch": 0.34,
"grad_norm": 0.9126365780830383,
"learning_rate": 7.71989586192017e-06,
"loss": 0.5861,
"step": 5326
},
{
"epoch": 0.34,
"grad_norm": 0.9049072265625,
"learning_rate": 7.719034894679589e-06,
"loss": 0.6177,
"step": 5327
},
{
"epoch": 0.34,
"grad_norm": 0.8507171273231506,
"learning_rate": 7.718173812949908e-06,
"loss": 0.5861,
"step": 5328
},
{
"epoch": 0.34,
"grad_norm": 0.9381729960441589,
"learning_rate": 7.717312616767382e-06,
"loss": 0.6176,
"step": 5329
},
{
"epoch": 0.34,
"grad_norm": 0.8493825793266296,
"learning_rate": 7.716451306168276e-06,
"loss": 0.6133,
"step": 5330
},
{
"epoch": 0.34,
"grad_norm": 0.8845789432525635,
"learning_rate": 7.715589881188852e-06,
"loss": 0.5937,
"step": 5331
},
{
"epoch": 0.34,
"grad_norm": 0.9143087863922119,
"learning_rate": 7.71472834186539e-06,
"loss": 0.5789,
"step": 5332
},
{
"epoch": 0.34,
"grad_norm": 0.9418982863426208,
"learning_rate": 7.713866688234157e-06,
"loss": 0.6042,
"step": 5333
},
{
"epoch": 0.34,
"grad_norm": 0.8240166902542114,
"learning_rate": 7.713004920331441e-06,
"loss": 0.6097,
"step": 5334
},
{
"epoch": 0.34,
"grad_norm": 0.8444035649299622,
"learning_rate": 7.712143038193525e-06,
"loss": 0.5586,
"step": 5335
},
{
"epoch": 0.34,
"grad_norm": 0.8501242995262146,
"learning_rate": 7.7112810418567e-06,
"loss": 0.5876,
"step": 5336
},
{
"epoch": 0.34,
"grad_norm": 0.8870479464530945,
"learning_rate": 7.710418931357263e-06,
"loss": 0.6137,
"step": 5337
},
{
"epoch": 0.34,
"grad_norm": 0.8917999267578125,
"learning_rate": 7.709556706731514e-06,
"loss": 0.6208,
"step": 5338
},
{
"epoch": 0.34,
"grad_norm": 0.8900894522666931,
"learning_rate": 7.708694368015758e-06,
"loss": 0.5654,
"step": 5339
},
{
"epoch": 0.34,
"grad_norm": 0.9891944527626038,
"learning_rate": 7.707831915246304e-06,
"loss": 0.5949,
"step": 5340
},
{
"epoch": 0.34,
"grad_norm": 0.9004802703857422,
"learning_rate": 7.706969348459469e-06,
"loss": 0.6573,
"step": 5341
},
{
"epoch": 0.34,
"grad_norm": 0.9725054502487183,
"learning_rate": 7.70610666769157e-06,
"loss": 0.6963,
"step": 5342
},
{
"epoch": 0.34,
"grad_norm": 0.895476758480072,
"learning_rate": 7.705243872978935e-06,
"loss": 0.5893,
"step": 5343
},
{
"epoch": 0.34,
"grad_norm": 0.898909866809845,
"learning_rate": 7.704380964357889e-06,
"loss": 0.5709,
"step": 5344
},
{
"epoch": 0.34,
"grad_norm": 0.8911014795303345,
"learning_rate": 7.70351794186477e-06,
"loss": 0.631,
"step": 5345
},
{
"epoch": 0.34,
"grad_norm": 0.8815633654594421,
"learning_rate": 7.702654805535915e-06,
"loss": 0.5953,
"step": 5346
},
{
"epoch": 0.34,
"grad_norm": 0.8706081509590149,
"learning_rate": 7.701791555407669e-06,
"loss": 0.5798,
"step": 5347
},
{
"epoch": 0.34,
"grad_norm": 0.839159369468689,
"learning_rate": 7.700928191516378e-06,
"loss": 0.6532,
"step": 5348
},
{
"epoch": 0.34,
"grad_norm": 0.8313089609146118,
"learning_rate": 7.700064713898398e-06,
"loss": 0.6238,
"step": 5349
},
{
"epoch": 0.34,
"grad_norm": 0.9056754112243652,
"learning_rate": 7.699201122590086e-06,
"loss": 0.6051,
"step": 5350
},
{
"epoch": 0.34,
"grad_norm": 0.8567859530448914,
"learning_rate": 7.6983374176278e-06,
"loss": 0.6282,
"step": 5351
},
{
"epoch": 0.34,
"grad_norm": 0.8340045809745789,
"learning_rate": 7.697473599047918e-06,
"loss": 0.605,
"step": 5352
},
{
"epoch": 0.34,
"grad_norm": 0.8645469546318054,
"learning_rate": 7.696609666886805e-06,
"loss": 0.6075,
"step": 5353
},
{
"epoch": 0.34,
"grad_norm": 0.8319426774978638,
"learning_rate": 7.695745621180839e-06,
"loss": 0.5254,
"step": 5354
},
{
"epoch": 0.34,
"grad_norm": 0.824740469455719,
"learning_rate": 7.694881461966402e-06,
"loss": 0.636,
"step": 5355
},
{
"epoch": 0.34,
"grad_norm": 0.8777102828025818,
"learning_rate": 7.694017189279882e-06,
"loss": 0.6096,
"step": 5356
},
{
"epoch": 0.34,
"grad_norm": 0.8239105939865112,
"learning_rate": 7.69315280315767e-06,
"loss": 0.5593,
"step": 5357
},
{
"epoch": 0.34,
"grad_norm": 0.9346814751625061,
"learning_rate": 7.692288303636163e-06,
"loss": 0.6136,
"step": 5358
},
{
"epoch": 0.34,
"grad_norm": 0.8404369950294495,
"learning_rate": 7.69142369075176e-06,
"loss": 0.5977,
"step": 5359
},
{
"epoch": 0.34,
"grad_norm": 0.8172876834869385,
"learning_rate": 7.690558964540872e-06,
"loss": 0.6095,
"step": 5360
},
{
"epoch": 0.34,
"grad_norm": 0.913045346736908,
"learning_rate": 7.6896941250399e-06,
"loss": 0.5932,
"step": 5361
},
{
"epoch": 0.34,
"grad_norm": 0.8815491795539856,
"learning_rate": 7.688829172285267e-06,
"loss": 0.6035,
"step": 5362
},
{
"epoch": 0.34,
"grad_norm": 0.8652727007865906,
"learning_rate": 7.687964106313392e-06,
"loss": 0.5792,
"step": 5363
},
{
"epoch": 0.34,
"grad_norm": 0.8789160847663879,
"learning_rate": 7.687098927160701e-06,
"loss": 0.6358,
"step": 5364
},
{
"epoch": 0.34,
"grad_norm": 0.8862786889076233,
"learning_rate": 7.68623363486362e-06,
"loss": 0.6316,
"step": 5365
},
{
"epoch": 0.34,
"grad_norm": 0.9177654981613159,
"learning_rate": 7.685368229458584e-06,
"loss": 0.5892,
"step": 5366
},
{
"epoch": 0.34,
"grad_norm": 0.9102894067764282,
"learning_rate": 7.684502710982035e-06,
"loss": 0.6003,
"step": 5367
},
{
"epoch": 0.34,
"grad_norm": 0.8236129283905029,
"learning_rate": 7.683637079470418e-06,
"loss": 0.6175,
"step": 5368
},
{
"epoch": 0.34,
"grad_norm": 0.886927604675293,
"learning_rate": 7.682771334960178e-06,
"loss": 0.5757,
"step": 5369
},
{
"epoch": 0.34,
"grad_norm": 0.8804916739463806,
"learning_rate": 7.681905477487769e-06,
"loss": 0.6581,
"step": 5370
},
{
"epoch": 0.34,
"grad_norm": 0.8064201474189758,
"learning_rate": 7.68103950708965e-06,
"loss": 0.5431,
"step": 5371
},
{
"epoch": 0.34,
"grad_norm": 0.8837984204292297,
"learning_rate": 7.680173423802282e-06,
"loss": 0.6277,
"step": 5372
},
{
"epoch": 0.34,
"grad_norm": 0.9009150266647339,
"learning_rate": 7.679307227662136e-06,
"loss": 0.6023,
"step": 5373
},
{
"epoch": 0.34,
"grad_norm": 0.9290765523910522,
"learning_rate": 7.678440918705686e-06,
"loss": 0.6381,
"step": 5374
},
{
"epoch": 0.34,
"grad_norm": 0.9562059640884399,
"learning_rate": 7.677574496969404e-06,
"loss": 0.6542,
"step": 5375
},
{
"epoch": 0.34,
"grad_norm": 0.9217070937156677,
"learning_rate": 7.676707962489775e-06,
"loss": 0.6375,
"step": 5376
},
{
"epoch": 0.34,
"grad_norm": 0.8787111043930054,
"learning_rate": 7.675841315303284e-06,
"loss": 0.6749,
"step": 5377
},
{
"epoch": 0.34,
"grad_norm": 0.9050287008285522,
"learning_rate": 7.674974555446425e-06,
"loss": 0.6174,
"step": 5378
},
{
"epoch": 0.34,
"grad_norm": 0.8558552861213684,
"learning_rate": 7.674107682955693e-06,
"loss": 0.5902,
"step": 5379
},
{
"epoch": 0.34,
"grad_norm": 0.8936824202537537,
"learning_rate": 7.67324069786759e-06,
"loss": 0.6121,
"step": 5380
},
{
"epoch": 0.34,
"grad_norm": 0.9137732982635498,
"learning_rate": 7.67237360021862e-06,
"loss": 0.6546,
"step": 5381
},
{
"epoch": 0.34,
"grad_norm": 0.9589877128601074,
"learning_rate": 7.671506390045293e-06,
"loss": 0.6522,
"step": 5382
},
{
"epoch": 0.34,
"grad_norm": 0.9142245054244995,
"learning_rate": 7.670639067384126e-06,
"loss": 0.6284,
"step": 5383
},
{
"epoch": 0.34,
"grad_norm": 0.8741958141326904,
"learning_rate": 7.66977163227164e-06,
"loss": 0.5957,
"step": 5384
},
{
"epoch": 0.34,
"grad_norm": 0.9198216795921326,
"learning_rate": 7.668904084744357e-06,
"loss": 0.5629,
"step": 5385
},
{
"epoch": 0.34,
"grad_norm": 0.8666446805000305,
"learning_rate": 7.668036424838808e-06,
"loss": 0.5829,
"step": 5386
},
{
"epoch": 0.34,
"grad_norm": 0.8472068309783936,
"learning_rate": 7.667168652591524e-06,
"loss": 0.6183,
"step": 5387
},
{
"epoch": 0.34,
"grad_norm": 0.960817277431488,
"learning_rate": 7.66630076803905e-06,
"loss": 0.6487,
"step": 5388
},
{
"epoch": 0.34,
"grad_norm": 0.8368389010429382,
"learning_rate": 7.665432771217922e-06,
"loss": 0.5899,
"step": 5389
},
{
"epoch": 0.34,
"grad_norm": 0.8463855385780334,
"learning_rate": 7.664564662164696e-06,
"loss": 0.6046,
"step": 5390
},
{
"epoch": 0.34,
"grad_norm": 0.9495236277580261,
"learning_rate": 7.66369644091592e-06,
"loss": 0.6498,
"step": 5391
},
{
"epoch": 0.34,
"grad_norm": 0.8692662119865417,
"learning_rate": 7.662828107508153e-06,
"loss": 0.6034,
"step": 5392
},
{
"epoch": 0.34,
"grad_norm": 0.8595423698425293,
"learning_rate": 7.661959661977958e-06,
"loss": 0.5903,
"step": 5393
},
{
"epoch": 0.34,
"grad_norm": 0.9107503890991211,
"learning_rate": 7.661091104361902e-06,
"loss": 0.6285,
"step": 5394
},
{
"epoch": 0.34,
"grad_norm": 0.8617141842842102,
"learning_rate": 7.660222434696556e-06,
"loss": 0.6372,
"step": 5395
},
{
"epoch": 0.34,
"grad_norm": 0.8542279005050659,
"learning_rate": 7.6593536530185e-06,
"loss": 0.6045,
"step": 5396
},
{
"epoch": 0.34,
"grad_norm": 0.9125630855560303,
"learning_rate": 7.658484759364308e-06,
"loss": 0.6111,
"step": 5397
},
{
"epoch": 0.34,
"grad_norm": 0.9282498359680176,
"learning_rate": 7.657615753770575e-06,
"loss": 0.6401,
"step": 5398
},
{
"epoch": 0.34,
"grad_norm": 0.760006844997406,
"learning_rate": 7.656746636273889e-06,
"loss": 0.5258,
"step": 5399
},
{
"epoch": 0.34,
"grad_norm": 0.8629961013793945,
"learning_rate": 7.655877406910841e-06,
"loss": 0.5787,
"step": 5400
},
{
"epoch": 0.34,
"grad_norm": 0.9403144121170044,
"learning_rate": 7.655008065718036e-06,
"loss": 0.6448,
"step": 5401
},
{
"epoch": 0.34,
"grad_norm": 0.8610935211181641,
"learning_rate": 7.654138612732078e-06,
"loss": 0.6125,
"step": 5402
},
{
"epoch": 0.34,
"grad_norm": 0.9002783298492432,
"learning_rate": 7.653269047989575e-06,
"loss": 0.5981,
"step": 5403
},
{
"epoch": 0.34,
"grad_norm": 0.8649095892906189,
"learning_rate": 7.652399371527142e-06,
"loss": 0.6343,
"step": 5404
},
{
"epoch": 0.34,
"grad_norm": 0.9302815794944763,
"learning_rate": 7.651529583381398e-06,
"loss": 0.6527,
"step": 5405
},
{
"epoch": 0.34,
"grad_norm": 0.9225360751152039,
"learning_rate": 7.65065968358897e-06,
"loss": 0.6909,
"step": 5406
},
{
"epoch": 0.34,
"grad_norm": 0.9352942109107971,
"learning_rate": 7.649789672186483e-06,
"loss": 0.6563,
"step": 5407
},
{
"epoch": 0.34,
"grad_norm": 0.9485490918159485,
"learning_rate": 7.648919549210567e-06,
"loss": 0.6339,
"step": 5408
},
{
"epoch": 0.34,
"grad_norm": 0.8463318347930908,
"learning_rate": 7.648049314697869e-06,
"loss": 0.5555,
"step": 5409
},
{
"epoch": 0.34,
"grad_norm": 0.8683443069458008,
"learning_rate": 7.647178968685024e-06,
"loss": 0.5861,
"step": 5410
},
{
"epoch": 0.34,
"grad_norm": 0.95866459608078,
"learning_rate": 7.646308511208682e-06,
"loss": 0.6818,
"step": 5411
},
{
"epoch": 0.34,
"grad_norm": 0.856253981590271,
"learning_rate": 7.645437942305491e-06,
"loss": 0.5561,
"step": 5412
},
{
"epoch": 0.34,
"grad_norm": 0.9046028852462769,
"learning_rate": 7.644567262012115e-06,
"loss": 0.6405,
"step": 5413
},
{
"epoch": 0.34,
"grad_norm": 0.8811362981796265,
"learning_rate": 7.643696470365209e-06,
"loss": 0.6266,
"step": 5414
},
{
"epoch": 0.34,
"grad_norm": 0.8369075059890747,
"learning_rate": 7.642825567401444e-06,
"loss": 0.5538,
"step": 5415
},
{
"epoch": 0.34,
"grad_norm": 0.9165283441543579,
"learning_rate": 7.641954553157487e-06,
"loss": 0.5952,
"step": 5416
},
{
"epoch": 0.34,
"grad_norm": 0.8416288495063782,
"learning_rate": 7.641083427670014e-06,
"loss": 0.6023,
"step": 5417
},
{
"epoch": 0.34,
"grad_norm": 0.8895038962364197,
"learning_rate": 7.640212190975707e-06,
"loss": 0.5969,
"step": 5418
},
{
"epoch": 0.34,
"grad_norm": 0.8565618991851807,
"learning_rate": 7.639340843111247e-06,
"loss": 0.5769,
"step": 5419
},
{
"epoch": 0.34,
"grad_norm": 0.825664222240448,
"learning_rate": 7.638469384113328e-06,
"loss": 0.5199,
"step": 5420
},
{
"epoch": 0.34,
"grad_norm": 0.8779264092445374,
"learning_rate": 7.637597814018638e-06,
"loss": 0.5795,
"step": 5421
},
{
"epoch": 0.34,
"grad_norm": 0.8773237466812134,
"learning_rate": 7.636726132863883e-06,
"loss": 0.6019,
"step": 5422
},
{
"epoch": 0.34,
"grad_norm": 0.9273678660392761,
"learning_rate": 7.635854340685762e-06,
"loss": 0.6015,
"step": 5423
},
{
"epoch": 0.34,
"grad_norm": 0.9024190902709961,
"learning_rate": 7.634982437520984e-06,
"loss": 0.5763,
"step": 5424
},
{
"epoch": 0.34,
"grad_norm": 0.8656637668609619,
"learning_rate": 7.634110423406262e-06,
"loss": 0.5785,
"step": 5425
},
{
"epoch": 0.34,
"grad_norm": 0.8862728476524353,
"learning_rate": 7.633238298378315e-06,
"loss": 0.6294,
"step": 5426
},
{
"epoch": 0.34,
"grad_norm": 0.8349065184593201,
"learning_rate": 7.632366062473862e-06,
"loss": 0.5862,
"step": 5427
},
{
"epoch": 0.34,
"grad_norm": 0.8949868083000183,
"learning_rate": 7.631493715729632e-06,
"loss": 0.5676,
"step": 5428
},
{
"epoch": 0.34,
"grad_norm": 0.897675633430481,
"learning_rate": 7.630621258182354e-06,
"loss": 0.5963,
"step": 5429
},
{
"epoch": 0.34,
"grad_norm": 0.8373680114746094,
"learning_rate": 7.62974868986877e-06,
"loss": 0.5706,
"step": 5430
},
{
"epoch": 0.34,
"grad_norm": 0.9069997072219849,
"learning_rate": 7.628876010825614e-06,
"loss": 0.6501,
"step": 5431
},
{
"epoch": 0.34,
"grad_norm": 0.8189912438392639,
"learning_rate": 7.628003221089635e-06,
"loss": 0.5475,
"step": 5432
},
{
"epoch": 0.34,
"grad_norm": 0.9497076869010925,
"learning_rate": 7.6271303206975825e-06,
"loss": 0.6459,
"step": 5433
},
{
"epoch": 0.34,
"grad_norm": 0.8492891788482666,
"learning_rate": 7.626257309686211e-06,
"loss": 0.5883,
"step": 5434
},
{
"epoch": 0.34,
"grad_norm": 0.8823180198669434,
"learning_rate": 7.6253841880922805e-06,
"loss": 0.5968,
"step": 5435
},
{
"epoch": 0.34,
"grad_norm": 0.8924271464347839,
"learning_rate": 7.624510955952555e-06,
"loss": 0.5706,
"step": 5436
},
{
"epoch": 0.34,
"grad_norm": 0.8900327682495117,
"learning_rate": 7.623637613303805e-06,
"loss": 0.5903,
"step": 5437
},
{
"epoch": 0.34,
"grad_norm": 0.8470126986503601,
"learning_rate": 7.6227641601827996e-06,
"loss": 0.553,
"step": 5438
},
{
"epoch": 0.34,
"grad_norm": 0.8747822642326355,
"learning_rate": 7.62189059662632e-06,
"loss": 0.6095,
"step": 5439
},
{
"epoch": 0.34,
"grad_norm": 0.8955729603767395,
"learning_rate": 7.621016922671147e-06,
"loss": 0.5983,
"step": 5440
},
{
"epoch": 0.34,
"grad_norm": 0.8502835631370544,
"learning_rate": 7.620143138354072e-06,
"loss": 0.5978,
"step": 5441
},
{
"epoch": 0.34,
"grad_norm": 0.8627199530601501,
"learning_rate": 7.6192692437118825e-06,
"loss": 0.6227,
"step": 5442
},
{
"epoch": 0.34,
"grad_norm": 0.930798351764679,
"learning_rate": 7.618395238781377e-06,
"loss": 0.6769,
"step": 5443
},
{
"epoch": 0.34,
"grad_norm": 0.889930009841919,
"learning_rate": 7.617521123599356e-06,
"loss": 0.6135,
"step": 5444
},
{
"epoch": 0.34,
"grad_norm": 0.910830557346344,
"learning_rate": 7.616646898202629e-06,
"loss": 0.6337,
"step": 5445
},
{
"epoch": 0.35,
"grad_norm": 0.867741048336029,
"learning_rate": 7.6157725626280014e-06,
"loss": 0.5566,
"step": 5446
},
{
"epoch": 0.35,
"grad_norm": 0.8112003207206726,
"learning_rate": 7.61489811691229e-06,
"loss": 0.57,
"step": 5447
},
{
"epoch": 0.35,
"grad_norm": 0.9317660927772522,
"learning_rate": 7.614023561092319e-06,
"loss": 0.6141,
"step": 5448
},
{
"epoch": 0.35,
"grad_norm": 0.8938388228416443,
"learning_rate": 7.613148895204906e-06,
"loss": 0.6114,
"step": 5449
},
{
"epoch": 0.35,
"grad_norm": 0.8985342979431152,
"learning_rate": 7.612274119286884e-06,
"loss": 0.626,
"step": 5450
},
{
"epoch": 0.35,
"grad_norm": 0.9427514672279358,
"learning_rate": 7.611399233375087e-06,
"loss": 0.6303,
"step": 5451
},
{
"epoch": 0.35,
"grad_norm": 0.9037792682647705,
"learning_rate": 7.610524237506354e-06,
"loss": 0.6456,
"step": 5452
},
{
"epoch": 0.35,
"grad_norm": 0.8891815543174744,
"learning_rate": 7.6096491317175246e-06,
"loss": 0.6235,
"step": 5453
},
{
"epoch": 0.35,
"grad_norm": 0.8519503474235535,
"learning_rate": 7.608773916045449e-06,
"loss": 0.5835,
"step": 5454
},
{
"epoch": 0.35,
"grad_norm": 0.8248928785324097,
"learning_rate": 7.607898590526979e-06,
"loss": 0.5891,
"step": 5455
},
{
"epoch": 0.35,
"grad_norm": 0.8195099234580994,
"learning_rate": 7.607023155198973e-06,
"loss": 0.5548,
"step": 5456
},
{
"epoch": 0.35,
"grad_norm": 0.8967714309692383,
"learning_rate": 7.606147610098289e-06,
"loss": 0.6207,
"step": 5457
},
{
"epoch": 0.35,
"grad_norm": 0.8687184453010559,
"learning_rate": 7.605271955261796e-06,
"loss": 0.6149,
"step": 5458
},
{
"epoch": 0.35,
"grad_norm": 0.9524543285369873,
"learning_rate": 7.604396190726364e-06,
"loss": 0.5933,
"step": 5459
},
{
"epoch": 0.35,
"grad_norm": 0.9514956474304199,
"learning_rate": 7.603520316528869e-06,
"loss": 0.6466,
"step": 5460
},
{
"epoch": 0.35,
"grad_norm": 0.89705491065979,
"learning_rate": 7.60264433270619e-06,
"loss": 0.608,
"step": 5461
},
{
"epoch": 0.35,
"grad_norm": 0.803554117679596,
"learning_rate": 7.601768239295213e-06,
"loss": 0.5203,
"step": 5462
},
{
"epoch": 0.35,
"grad_norm": 0.9711521863937378,
"learning_rate": 7.600892036332825e-06,
"loss": 0.6572,
"step": 5463
},
{
"epoch": 0.35,
"grad_norm": 0.8963906168937683,
"learning_rate": 7.600015723855922e-06,
"loss": 0.6329,
"step": 5464
},
{
"epoch": 0.35,
"grad_norm": 0.9670395255088806,
"learning_rate": 7.599139301901401e-06,
"loss": 0.6172,
"step": 5465
},
{
"epoch": 0.35,
"grad_norm": 0.9355558156967163,
"learning_rate": 7.5982627705061666e-06,
"loss": 0.6574,
"step": 5466
},
{
"epoch": 0.35,
"grad_norm": 0.8632118105888367,
"learning_rate": 7.597386129707126e-06,
"loss": 0.6021,
"step": 5467
},
{
"epoch": 0.35,
"grad_norm": 0.8859368562698364,
"learning_rate": 7.596509379541191e-06,
"loss": 0.5763,
"step": 5468
},
{
"epoch": 0.35,
"grad_norm": 0.8613402843475342,
"learning_rate": 7.595632520045277e-06,
"loss": 0.6077,
"step": 5469
},
{
"epoch": 0.35,
"grad_norm": 0.8863072395324707,
"learning_rate": 7.594755551256308e-06,
"loss": 0.5881,
"step": 5470
},
{
"epoch": 0.35,
"grad_norm": 0.9150487780570984,
"learning_rate": 7.593878473211209e-06,
"loss": 0.6351,
"step": 5471
},
{
"epoch": 0.35,
"grad_norm": 0.8424960970878601,
"learning_rate": 7.593001285946913e-06,
"loss": 0.5416,
"step": 5472
},
{
"epoch": 0.35,
"grad_norm": 0.8403632640838623,
"learning_rate": 7.592123989500351e-06,
"loss": 0.6015,
"step": 5473
},
{
"epoch": 0.35,
"grad_norm": 0.9487394094467163,
"learning_rate": 7.591246583908465e-06,
"loss": 0.6518,
"step": 5474
},
{
"epoch": 0.35,
"grad_norm": 0.915139377117157,
"learning_rate": 7.590369069208201e-06,
"loss": 0.6258,
"step": 5475
},
{
"epoch": 0.35,
"grad_norm": 0.8754032254219055,
"learning_rate": 7.589491445436505e-06,
"loss": 0.6127,
"step": 5476
},
{
"epoch": 0.35,
"grad_norm": 0.8421617746353149,
"learning_rate": 7.588613712630334e-06,
"loss": 0.5761,
"step": 5477
},
{
"epoch": 0.35,
"grad_norm": 0.8702454566955566,
"learning_rate": 7.587735870826643e-06,
"loss": 0.5819,
"step": 5478
},
{
"epoch": 0.35,
"grad_norm": 0.8792976140975952,
"learning_rate": 7.586857920062399e-06,
"loss": 0.6374,
"step": 5479
},
{
"epoch": 0.35,
"grad_norm": 0.9013099074363708,
"learning_rate": 7.585979860374566e-06,
"loss": 0.6053,
"step": 5480
},
{
"epoch": 0.35,
"grad_norm": 0.8370474576950073,
"learning_rate": 7.5851016918001165e-06,
"loss": 0.5803,
"step": 5481
},
{
"epoch": 0.35,
"grad_norm": 0.8300336003303528,
"learning_rate": 7.584223414376028e-06,
"loss": 0.5983,
"step": 5482
},
{
"epoch": 0.35,
"grad_norm": 0.9231306910514832,
"learning_rate": 7.583345028139282e-06,
"loss": 0.6231,
"step": 5483
},
{
"epoch": 0.35,
"grad_norm": 0.8919202089309692,
"learning_rate": 7.582466533126863e-06,
"loss": 0.6033,
"step": 5484
},
{
"epoch": 0.35,
"grad_norm": 0.8878291845321655,
"learning_rate": 7.581587929375761e-06,
"loss": 0.6483,
"step": 5485
},
{
"epoch": 0.35,
"grad_norm": 0.7955220341682434,
"learning_rate": 7.580709216922973e-06,
"loss": 0.6065,
"step": 5486
},
{
"epoch": 0.35,
"grad_norm": 0.9067592620849609,
"learning_rate": 7.579830395805499e-06,
"loss": 0.6261,
"step": 5487
},
{
"epoch": 0.35,
"grad_norm": 0.9961644411087036,
"learning_rate": 7.578951466060341e-06,
"loss": 0.6041,
"step": 5488
},
{
"epoch": 0.35,
"grad_norm": 0.8630528450012207,
"learning_rate": 7.578072427724506e-06,
"loss": 0.5756,
"step": 5489
},
{
"epoch": 0.35,
"grad_norm": 0.8708525896072388,
"learning_rate": 7.577193280835011e-06,
"loss": 0.6126,
"step": 5490
},
{
"epoch": 0.35,
"grad_norm": 0.8305570483207703,
"learning_rate": 7.5763140254288716e-06,
"loss": 0.5874,
"step": 5491
},
{
"epoch": 0.35,
"grad_norm": 0.9040376543998718,
"learning_rate": 7.575434661543113e-06,
"loss": 0.6401,
"step": 5492
},
{
"epoch": 0.35,
"grad_norm": 0.9144179224967957,
"learning_rate": 7.574555189214756e-06,
"loss": 0.6298,
"step": 5493
},
{
"epoch": 0.35,
"grad_norm": 0.9132001399993896,
"learning_rate": 7.573675608480841e-06,
"loss": 0.5974,
"step": 5494
},
{
"epoch": 0.35,
"grad_norm": 0.8850140571594238,
"learning_rate": 7.5727959193783974e-06,
"loss": 0.6335,
"step": 5495
},
{
"epoch": 0.35,
"grad_norm": 0.9785036444664001,
"learning_rate": 7.571916121944467e-06,
"loss": 0.5492,
"step": 5496
},
{
"epoch": 0.35,
"grad_norm": 0.8609431385993958,
"learning_rate": 7.571036216216097e-06,
"loss": 0.5885,
"step": 5497
},
{
"epoch": 0.35,
"grad_norm": 0.9320406317710876,
"learning_rate": 7.570156202230335e-06,
"loss": 0.6477,
"step": 5498
},
{
"epoch": 0.35,
"grad_norm": 0.8788042664527893,
"learning_rate": 7.569276080024237e-06,
"loss": 0.6574,
"step": 5499
},
{
"epoch": 0.35,
"grad_norm": 0.8510634899139404,
"learning_rate": 7.5683958496348596e-06,
"loss": 0.6256,
"step": 5500
},
{
"epoch": 0.35,
"grad_norm": 0.8864413499832153,
"learning_rate": 7.567515511099268e-06,
"loss": 0.5793,
"step": 5501
},
{
"epoch": 0.35,
"grad_norm": 0.860865592956543,
"learning_rate": 7.56663506445453e-06,
"loss": 0.6229,
"step": 5502
},
{
"epoch": 0.35,
"grad_norm": 0.913250744342804,
"learning_rate": 7.5657545097377205e-06,
"loss": 0.5914,
"step": 5503
},
{
"epoch": 0.35,
"grad_norm": 0.8102872967720032,
"learning_rate": 7.564873846985912e-06,
"loss": 0.5833,
"step": 5504
},
{
"epoch": 0.35,
"grad_norm": 0.8643232583999634,
"learning_rate": 7.563993076236189e-06,
"loss": 0.6061,
"step": 5505
},
{
"epoch": 0.35,
"grad_norm": 0.9757564067840576,
"learning_rate": 7.563112197525637e-06,
"loss": 0.6515,
"step": 5506
},
{
"epoch": 0.35,
"grad_norm": 0.8703305721282959,
"learning_rate": 7.562231210891347e-06,
"loss": 0.5819,
"step": 5507
},
{
"epoch": 0.35,
"grad_norm": 0.8819752931594849,
"learning_rate": 7.561350116370413e-06,
"loss": 0.5966,
"step": 5508
},
{
"epoch": 0.35,
"grad_norm": 0.8967403173446655,
"learning_rate": 7.560468913999937e-06,
"loss": 0.6338,
"step": 5509
},
{
"epoch": 0.35,
"grad_norm": 0.8586651682853699,
"learning_rate": 7.559587603817022e-06,
"loss": 0.6135,
"step": 5510
},
{
"epoch": 0.35,
"grad_norm": 0.8888817429542542,
"learning_rate": 7.558706185858777e-06,
"loss": 0.6236,
"step": 5511
},
{
"epoch": 0.35,
"grad_norm": 0.8927393555641174,
"learning_rate": 7.557824660162316e-06,
"loss": 0.6012,
"step": 5512
},
{
"epoch": 0.35,
"grad_norm": 0.9518846273422241,
"learning_rate": 7.556943026764756e-06,
"loss": 0.5581,
"step": 5513
},
{
"epoch": 0.35,
"grad_norm": 0.8957030773162842,
"learning_rate": 7.55606128570322e-06,
"loss": 0.5849,
"step": 5514
},
{
"epoch": 0.35,
"grad_norm": 0.9107878804206848,
"learning_rate": 7.5551794370148366e-06,
"loss": 0.6504,
"step": 5515
},
{
"epoch": 0.35,
"grad_norm": 0.8559346795082092,
"learning_rate": 7.554297480736734e-06,
"loss": 0.5891,
"step": 5516
},
{
"epoch": 0.35,
"grad_norm": 0.8798370361328125,
"learning_rate": 7.553415416906051e-06,
"loss": 0.6028,
"step": 5517
},
{
"epoch": 0.35,
"grad_norm": 0.9414769411087036,
"learning_rate": 7.552533245559927e-06,
"loss": 0.6174,
"step": 5518
},
{
"epoch": 0.35,
"grad_norm": 0.8583175539970398,
"learning_rate": 7.551650966735509e-06,
"loss": 0.5641,
"step": 5519
},
{
"epoch": 0.35,
"grad_norm": 0.8779864311218262,
"learning_rate": 7.550768580469945e-06,
"loss": 0.6283,
"step": 5520
},
{
"epoch": 0.35,
"grad_norm": 0.8857389092445374,
"learning_rate": 7.549886086800389e-06,
"loss": 0.5855,
"step": 5521
},
{
"epoch": 0.35,
"grad_norm": 0.8128264546394348,
"learning_rate": 7.549003485763999e-06,
"loss": 0.4986,
"step": 5522
},
{
"epoch": 0.35,
"grad_norm": 0.9185560941696167,
"learning_rate": 7.548120777397941e-06,
"loss": 0.6204,
"step": 5523
},
{
"epoch": 0.35,
"grad_norm": 0.9126561284065247,
"learning_rate": 7.547237961739382e-06,
"loss": 0.6516,
"step": 5524
},
{
"epoch": 0.35,
"grad_norm": 0.8364182114601135,
"learning_rate": 7.546355038825492e-06,
"loss": 0.573,
"step": 5525
},
{
"epoch": 0.35,
"grad_norm": 0.8545491099357605,
"learning_rate": 7.545472008693451e-06,
"loss": 0.6251,
"step": 5526
},
{
"epoch": 0.35,
"grad_norm": 0.9368882775306702,
"learning_rate": 7.544588871380439e-06,
"loss": 0.6421,
"step": 5527
},
{
"epoch": 0.35,
"grad_norm": 0.8525586128234863,
"learning_rate": 7.54370562692364e-06,
"loss": 0.6311,
"step": 5528
},
{
"epoch": 0.35,
"grad_norm": 0.8583645820617676,
"learning_rate": 7.542822275360246e-06,
"loss": 0.6295,
"step": 5529
},
{
"epoch": 0.35,
"grad_norm": 0.9096074104309082,
"learning_rate": 7.541938816727453e-06,
"loss": 0.6628,
"step": 5530
},
{
"epoch": 0.35,
"grad_norm": 0.8697735071182251,
"learning_rate": 7.5410552510624594e-06,
"loss": 0.5986,
"step": 5531
},
{
"epoch": 0.35,
"grad_norm": 0.869107186794281,
"learning_rate": 7.540171578402466e-06,
"loss": 0.6293,
"step": 5532
},
{
"epoch": 0.35,
"grad_norm": 0.8785176873207092,
"learning_rate": 7.539287798784688e-06,
"loss": 0.5971,
"step": 5533
},
{
"epoch": 0.35,
"grad_norm": 0.9223856329917908,
"learning_rate": 7.538403912246333e-06,
"loss": 0.5812,
"step": 5534
},
{
"epoch": 0.35,
"grad_norm": 0.8824152946472168,
"learning_rate": 7.537519918824619e-06,
"loss": 0.5718,
"step": 5535
},
{
"epoch": 0.35,
"grad_norm": 0.8068228960037231,
"learning_rate": 7.5366358185567676e-06,
"loss": 0.5295,
"step": 5536
},
{
"epoch": 0.35,
"grad_norm": 0.8570433259010315,
"learning_rate": 7.5357516114800075e-06,
"loss": 0.6182,
"step": 5537
},
{
"epoch": 0.35,
"grad_norm": 0.8204308748245239,
"learning_rate": 7.534867297631569e-06,
"loss": 0.5698,
"step": 5538
},
{
"epoch": 0.35,
"grad_norm": 0.9290466904640198,
"learning_rate": 7.533982877048685e-06,
"loss": 0.6612,
"step": 5539
},
{
"epoch": 0.35,
"grad_norm": 0.925410807132721,
"learning_rate": 7.5330983497685975e-06,
"loss": 0.6015,
"step": 5540
},
{
"epoch": 0.35,
"grad_norm": 0.8890109658241272,
"learning_rate": 7.532213715828551e-06,
"loss": 0.5958,
"step": 5541
},
{
"epoch": 0.35,
"grad_norm": 0.8188264966011047,
"learning_rate": 7.531328975265795e-06,
"loss": 0.6184,
"step": 5542
},
{
"epoch": 0.35,
"grad_norm": 0.8602173328399658,
"learning_rate": 7.53044412811758e-06,
"loss": 0.5672,
"step": 5543
},
{
"epoch": 0.35,
"grad_norm": 0.8795886039733887,
"learning_rate": 7.529559174421167e-06,
"loss": 0.6153,
"step": 5544
},
{
"epoch": 0.35,
"grad_norm": 0.8424326777458191,
"learning_rate": 7.528674114213816e-06,
"loss": 0.6177,
"step": 5545
},
{
"epoch": 0.35,
"grad_norm": 0.9181726574897766,
"learning_rate": 7.527788947532795e-06,
"loss": 0.6457,
"step": 5546
},
{
"epoch": 0.35,
"grad_norm": 0.980117678642273,
"learning_rate": 7.526903674415373e-06,
"loss": 0.6007,
"step": 5547
},
{
"epoch": 0.35,
"grad_norm": 0.9220601916313171,
"learning_rate": 7.526018294898832e-06,
"loss": 0.6301,
"step": 5548
},
{
"epoch": 0.35,
"grad_norm": 0.9367707371711731,
"learning_rate": 7.525132809020443e-06,
"loss": 0.5758,
"step": 5549
},
{
"epoch": 0.35,
"grad_norm": 0.8081425428390503,
"learning_rate": 7.524247216817499e-06,
"loss": 0.5754,
"step": 5550
},
{
"epoch": 0.35,
"grad_norm": 0.8742004632949829,
"learning_rate": 7.5233615183272836e-06,
"loss": 0.5852,
"step": 5551
},
{
"epoch": 0.35,
"grad_norm": 0.9098623394966125,
"learning_rate": 7.522475713587095e-06,
"loss": 0.6307,
"step": 5552
},
{
"epoch": 0.35,
"grad_norm": 0.8602703809738159,
"learning_rate": 7.521589802634228e-06,
"loss": 0.633,
"step": 5553
},
{
"epoch": 0.35,
"grad_norm": 0.8878544569015503,
"learning_rate": 7.520703785505987e-06,
"loss": 0.6116,
"step": 5554
},
{
"epoch": 0.35,
"grad_norm": 0.8622645735740662,
"learning_rate": 7.519817662239678e-06,
"loss": 0.5874,
"step": 5555
},
{
"epoch": 0.35,
"grad_norm": 0.8816009759902954,
"learning_rate": 7.518931432872614e-06,
"loss": 0.6309,
"step": 5556
},
{
"epoch": 0.35,
"grad_norm": 0.8535116910934448,
"learning_rate": 7.518045097442111e-06,
"loss": 0.6286,
"step": 5557
},
{
"epoch": 0.35,
"grad_norm": 0.9062272310256958,
"learning_rate": 7.517158655985483e-06,
"loss": 0.6187,
"step": 5558
},
{
"epoch": 0.35,
"grad_norm": 0.9796926975250244,
"learning_rate": 7.516272108540066e-06,
"loss": 0.6543,
"step": 5559
},
{
"epoch": 0.35,
"grad_norm": 0.9051242470741272,
"learning_rate": 7.515385455143183e-06,
"loss": 0.6491,
"step": 5560
},
{
"epoch": 0.35,
"grad_norm": 0.935102105140686,
"learning_rate": 7.514498695832169e-06,
"loss": 0.6305,
"step": 5561
},
{
"epoch": 0.35,
"grad_norm": 0.8482328653335571,
"learning_rate": 7.51361183064436e-06,
"loss": 0.6013,
"step": 5562
},
{
"epoch": 0.35,
"grad_norm": 0.9039483070373535,
"learning_rate": 7.512724859617103e-06,
"loss": 0.591,
"step": 5563
},
{
"epoch": 0.35,
"grad_norm": 0.924065113067627,
"learning_rate": 7.511837782787743e-06,
"loss": 0.5901,
"step": 5564
},
{
"epoch": 0.35,
"grad_norm": 0.8983739614486694,
"learning_rate": 7.510950600193632e-06,
"loss": 0.5847,
"step": 5565
},
{
"epoch": 0.35,
"grad_norm": 0.8916130065917969,
"learning_rate": 7.510063311872125e-06,
"loss": 0.5815,
"step": 5566
},
{
"epoch": 0.35,
"grad_norm": 0.9395748972892761,
"learning_rate": 7.509175917860586e-06,
"loss": 0.6353,
"step": 5567
},
{
"epoch": 0.35,
"grad_norm": 0.8860333561897278,
"learning_rate": 7.508288418196377e-06,
"loss": 0.6206,
"step": 5568
},
{
"epoch": 0.35,
"grad_norm": 0.8644207715988159,
"learning_rate": 7.507400812916868e-06,
"loss": 0.5608,
"step": 5569
},
{
"epoch": 0.35,
"grad_norm": 0.8900479674339294,
"learning_rate": 7.5065131020594316e-06,
"loss": 0.6308,
"step": 5570
},
{
"epoch": 0.35,
"grad_norm": 0.862021267414093,
"learning_rate": 7.5056252856614505e-06,
"loss": 0.5858,
"step": 5571
},
{
"epoch": 0.35,
"grad_norm": 0.90825355052948,
"learning_rate": 7.504737363760306e-06,
"loss": 0.6993,
"step": 5572
},
{
"epoch": 0.35,
"grad_norm": 0.9253191351890564,
"learning_rate": 7.503849336393382e-06,
"loss": 0.6081,
"step": 5573
},
{
"epoch": 0.35,
"grad_norm": 0.9334720969200134,
"learning_rate": 7.502961203598074e-06,
"loss": 0.6203,
"step": 5574
},
{
"epoch": 0.35,
"grad_norm": 0.861369252204895,
"learning_rate": 7.502072965411776e-06,
"loss": 0.5873,
"step": 5575
},
{
"epoch": 0.35,
"grad_norm": 0.941525399684906,
"learning_rate": 7.501184621871891e-06,
"loss": 0.5849,
"step": 5576
},
{
"epoch": 0.35,
"grad_norm": 0.9132643342018127,
"learning_rate": 7.5002961730158204e-06,
"loss": 0.5786,
"step": 5577
},
{
"epoch": 0.35,
"grad_norm": 0.8970744013786316,
"learning_rate": 7.499407618880979e-06,
"loss": 0.6126,
"step": 5578
},
{
"epoch": 0.35,
"grad_norm": 0.8514313697814941,
"learning_rate": 7.498518959504775e-06,
"loss": 0.6322,
"step": 5579
},
{
"epoch": 0.35,
"grad_norm": 0.8997253775596619,
"learning_rate": 7.49763019492463e-06,
"loss": 0.6461,
"step": 5580
},
{
"epoch": 0.35,
"grad_norm": 0.8769670128822327,
"learning_rate": 7.4967413251779655e-06,
"loss": 0.6362,
"step": 5581
},
{
"epoch": 0.35,
"grad_norm": 0.928396999835968,
"learning_rate": 7.495852350302209e-06,
"loss": 0.6229,
"step": 5582
},
{
"epoch": 0.35,
"grad_norm": 0.8975219130516052,
"learning_rate": 7.494963270334794e-06,
"loss": 0.6457,
"step": 5583
},
{
"epoch": 0.35,
"grad_norm": 0.8608077764511108,
"learning_rate": 7.494074085313155e-06,
"loss": 0.5369,
"step": 5584
},
{
"epoch": 0.35,
"grad_norm": 0.917822003364563,
"learning_rate": 7.493184795274731e-06,
"loss": 0.6064,
"step": 5585
},
{
"epoch": 0.35,
"grad_norm": 0.9204185605049133,
"learning_rate": 7.49229540025697e-06,
"loss": 0.7078,
"step": 5586
},
{
"epoch": 0.35,
"grad_norm": 0.8705748915672302,
"learning_rate": 7.4914059002973185e-06,
"loss": 0.6384,
"step": 5587
},
{
"epoch": 0.35,
"grad_norm": 0.8483352661132812,
"learning_rate": 7.490516295433232e-06,
"loss": 0.5437,
"step": 5588
},
{
"epoch": 0.35,
"grad_norm": 0.8893619179725647,
"learning_rate": 7.489626585702169e-06,
"loss": 0.5999,
"step": 5589
},
{
"epoch": 0.35,
"grad_norm": 0.8645599484443665,
"learning_rate": 7.4887367711415905e-06,
"loss": 0.6121,
"step": 5590
},
{
"epoch": 0.35,
"grad_norm": 0.8719490766525269,
"learning_rate": 7.487846851788965e-06,
"loss": 0.6051,
"step": 5591
},
{
"epoch": 0.35,
"grad_norm": 0.9109401702880859,
"learning_rate": 7.486956827681761e-06,
"loss": 0.616,
"step": 5592
},
{
"epoch": 0.35,
"grad_norm": 0.9400895237922668,
"learning_rate": 7.4860666988574585e-06,
"loss": 0.6035,
"step": 5593
},
{
"epoch": 0.35,
"grad_norm": 0.8858636021614075,
"learning_rate": 7.485176465353534e-06,
"loss": 0.5885,
"step": 5594
},
{
"epoch": 0.35,
"grad_norm": 0.7887114882469177,
"learning_rate": 7.484286127207476e-06,
"loss": 0.5434,
"step": 5595
},
{
"epoch": 0.35,
"grad_norm": 0.8736209869384766,
"learning_rate": 7.48339568445677e-06,
"loss": 0.6051,
"step": 5596
},
{
"epoch": 0.35,
"grad_norm": 0.8536117672920227,
"learning_rate": 7.482505137138911e-06,
"loss": 0.6083,
"step": 5597
},
{
"epoch": 0.35,
"grad_norm": 0.9169653654098511,
"learning_rate": 7.4816144852913975e-06,
"loss": 0.6361,
"step": 5598
},
{
"epoch": 0.35,
"grad_norm": 0.9062714576721191,
"learning_rate": 7.480723728951731e-06,
"loss": 0.6284,
"step": 5599
},
{
"epoch": 0.35,
"grad_norm": 0.8766511678695679,
"learning_rate": 7.479832868157416e-06,
"loss": 0.6035,
"step": 5600
},
{
"epoch": 0.35,
"grad_norm": 0.8506543636322021,
"learning_rate": 7.4789419029459675e-06,
"loss": 0.5387,
"step": 5601
},
{
"epoch": 0.35,
"grad_norm": 0.8686463236808777,
"learning_rate": 7.478050833354897e-06,
"loss": 0.6335,
"step": 5602
},
{
"epoch": 0.35,
"grad_norm": 0.8849419951438904,
"learning_rate": 7.47715965942173e-06,
"loss": 0.5983,
"step": 5603
},
{
"epoch": 0.36,
"grad_norm": 0.8478937745094299,
"learning_rate": 7.476268381183984e-06,
"loss": 0.5266,
"step": 5604
},
{
"epoch": 0.36,
"grad_norm": 0.9055560827255249,
"learning_rate": 7.475376998679193e-06,
"loss": 0.6483,
"step": 5605
},
{
"epoch": 0.36,
"grad_norm": 0.9069551825523376,
"learning_rate": 7.474485511944887e-06,
"loss": 0.6147,
"step": 5606
},
{
"epoch": 0.36,
"grad_norm": 0.8674218058586121,
"learning_rate": 7.4735939210186036e-06,
"loss": 0.5723,
"step": 5607
},
{
"epoch": 0.36,
"grad_norm": 0.8519677519798279,
"learning_rate": 7.472702225937884e-06,
"loss": 0.5941,
"step": 5608
},
{
"epoch": 0.36,
"grad_norm": 0.9071281552314758,
"learning_rate": 7.471810426740278e-06,
"loss": 0.5995,
"step": 5609
},
{
"epoch": 0.36,
"grad_norm": 0.8679485321044922,
"learning_rate": 7.470918523463333e-06,
"loss": 0.5833,
"step": 5610
},
{
"epoch": 0.36,
"grad_norm": 0.8723646402359009,
"learning_rate": 7.470026516144604e-06,
"loss": 0.6437,
"step": 5611
},
{
"epoch": 0.36,
"grad_norm": 0.8494699001312256,
"learning_rate": 7.469134404821652e-06,
"loss": 0.5599,
"step": 5612
},
{
"epoch": 0.36,
"grad_norm": 0.9291670322418213,
"learning_rate": 7.468242189532039e-06,
"loss": 0.5898,
"step": 5613
},
{
"epoch": 0.36,
"grad_norm": 0.9132770895957947,
"learning_rate": 7.467349870313334e-06,
"loss": 0.5415,
"step": 5614
},
{
"epoch": 0.36,
"grad_norm": 0.8860681653022766,
"learning_rate": 7.466457447203109e-06,
"loss": 0.5958,
"step": 5615
},
{
"epoch": 0.36,
"grad_norm": 0.8410258293151855,
"learning_rate": 7.465564920238941e-06,
"loss": 0.5857,
"step": 5616
},
{
"epoch": 0.36,
"grad_norm": 0.8339051008224487,
"learning_rate": 7.464672289458411e-06,
"loss": 0.5913,
"step": 5617
},
{
"epoch": 0.36,
"grad_norm": 0.9605398774147034,
"learning_rate": 7.463779554899107e-06,
"loss": 0.6138,
"step": 5618
},
{
"epoch": 0.36,
"grad_norm": 0.8939738273620605,
"learning_rate": 7.462886716598614e-06,
"loss": 0.6042,
"step": 5619
},
{
"epoch": 0.36,
"grad_norm": 0.842354953289032,
"learning_rate": 7.46199377459453e-06,
"loss": 0.6018,
"step": 5620
},
{
"epoch": 0.36,
"grad_norm": 0.9019548892974854,
"learning_rate": 7.461100728924455e-06,
"loss": 0.556,
"step": 5621
},
{
"epoch": 0.36,
"grad_norm": 0.8871820569038391,
"learning_rate": 7.460207579625988e-06,
"loss": 0.5807,
"step": 5622
},
{
"epoch": 0.36,
"grad_norm": 0.8736592531204224,
"learning_rate": 7.459314326736738e-06,
"loss": 0.5672,
"step": 5623
},
{
"epoch": 0.36,
"grad_norm": 0.9413090348243713,
"learning_rate": 7.458420970294317e-06,
"loss": 0.6163,
"step": 5624
},
{
"epoch": 0.36,
"grad_norm": 0.8506051301956177,
"learning_rate": 7.457527510336342e-06,
"loss": 0.5363,
"step": 5625
},
{
"epoch": 0.36,
"grad_norm": 0.869382381439209,
"learning_rate": 7.456633946900432e-06,
"loss": 0.6099,
"step": 5626
},
{
"epoch": 0.36,
"grad_norm": 0.8384730219841003,
"learning_rate": 7.455740280024212e-06,
"loss": 0.5834,
"step": 5627
},
{
"epoch": 0.36,
"grad_norm": 0.8252652883529663,
"learning_rate": 7.454846509745311e-06,
"loss": 0.5918,
"step": 5628
},
{
"epoch": 0.36,
"grad_norm": 0.9574599862098694,
"learning_rate": 7.453952636101366e-06,
"loss": 0.6747,
"step": 5629
},
{
"epoch": 0.36,
"grad_norm": 0.8753709197044373,
"learning_rate": 7.45305865913001e-06,
"loss": 0.6559,
"step": 5630
},
{
"epoch": 0.36,
"grad_norm": 0.9628907442092896,
"learning_rate": 7.452164578868889e-06,
"loss": 0.6476,
"step": 5631
},
{
"epoch": 0.36,
"grad_norm": 0.9094507098197937,
"learning_rate": 7.451270395355647e-06,
"loss": 0.6579,
"step": 5632
},
{
"epoch": 0.36,
"grad_norm": 0.7960030436515808,
"learning_rate": 7.450376108627937e-06,
"loss": 0.5376,
"step": 5633
},
{
"epoch": 0.36,
"grad_norm": 0.8576752543449402,
"learning_rate": 7.449481718723411e-06,
"loss": 0.616,
"step": 5634
},
{
"epoch": 0.36,
"grad_norm": 0.8710610866546631,
"learning_rate": 7.448587225679733e-06,
"loss": 0.6292,
"step": 5635
},
{
"epoch": 0.36,
"grad_norm": 0.9258856177330017,
"learning_rate": 7.447692629534565e-06,
"loss": 0.5753,
"step": 5636
},
{
"epoch": 0.36,
"grad_norm": 0.911663830280304,
"learning_rate": 7.446797930325574e-06,
"loss": 0.6113,
"step": 5637
},
{
"epoch": 0.36,
"grad_norm": 0.8927462697029114,
"learning_rate": 7.445903128090435e-06,
"loss": 0.5843,
"step": 5638
},
{
"epoch": 0.36,
"grad_norm": 0.9059770703315735,
"learning_rate": 7.445008222866823e-06,
"loss": 0.5649,
"step": 5639
},
{
"epoch": 0.36,
"grad_norm": 0.8566960692405701,
"learning_rate": 7.444113214692422e-06,
"loss": 0.5713,
"step": 5640
},
{
"epoch": 0.36,
"grad_norm": 0.9214499592781067,
"learning_rate": 7.443218103604915e-06,
"loss": 0.5919,
"step": 5641
},
{
"epoch": 0.36,
"grad_norm": 0.9560672640800476,
"learning_rate": 7.442322889641992e-06,
"loss": 0.6563,
"step": 5642
},
{
"epoch": 0.36,
"grad_norm": 0.9713243246078491,
"learning_rate": 7.441427572841349e-06,
"loss": 0.6801,
"step": 5643
},
{
"epoch": 0.36,
"grad_norm": 0.8504186272621155,
"learning_rate": 7.440532153240685e-06,
"loss": 0.5809,
"step": 5644
},
{
"epoch": 0.36,
"grad_norm": 0.8800424337387085,
"learning_rate": 7.4396366308777015e-06,
"loss": 0.6323,
"step": 5645
},
{
"epoch": 0.36,
"grad_norm": 0.8435956239700317,
"learning_rate": 7.4387410057901056e-06,
"loss": 0.5616,
"step": 5646
},
{
"epoch": 0.36,
"grad_norm": 0.8319722414016724,
"learning_rate": 7.4378452780156094e-06,
"loss": 0.5398,
"step": 5647
},
{
"epoch": 0.36,
"grad_norm": 0.9279896020889282,
"learning_rate": 7.436949447591931e-06,
"loss": 0.5779,
"step": 5648
},
{
"epoch": 0.36,
"grad_norm": 0.8527793884277344,
"learning_rate": 7.4360535145567865e-06,
"loss": 0.5943,
"step": 5649
},
{
"epoch": 0.36,
"grad_norm": 0.8525310754776001,
"learning_rate": 7.435157478947905e-06,
"loss": 0.5427,
"step": 5650
},
{
"epoch": 0.36,
"grad_norm": 0.8320702910423279,
"learning_rate": 7.434261340803013e-06,
"loss": 0.5816,
"step": 5651
},
{
"epoch": 0.36,
"grad_norm": 0.8606296181678772,
"learning_rate": 7.433365100159844e-06,
"loss": 0.582,
"step": 5652
},
{
"epoch": 0.36,
"grad_norm": 0.9004180431365967,
"learning_rate": 7.432468757056136e-06,
"loss": 0.581,
"step": 5653
},
{
"epoch": 0.36,
"grad_norm": 0.858650267124176,
"learning_rate": 7.431572311529629e-06,
"loss": 0.6042,
"step": 5654
},
{
"epoch": 0.36,
"grad_norm": 0.9310391545295715,
"learning_rate": 7.4306757636180725e-06,
"loss": 0.5514,
"step": 5655
},
{
"epoch": 0.36,
"grad_norm": 0.8439887762069702,
"learning_rate": 7.429779113359214e-06,
"loss": 0.5636,
"step": 5656
},
{
"epoch": 0.36,
"grad_norm": 0.890603244304657,
"learning_rate": 7.428882360790811e-06,
"loss": 0.572,
"step": 5657
},
{
"epoch": 0.36,
"grad_norm": 0.9427062273025513,
"learning_rate": 7.427985505950619e-06,
"loss": 0.5997,
"step": 5658
},
{
"epoch": 0.36,
"grad_norm": 0.9088672399520874,
"learning_rate": 7.427088548876406e-06,
"loss": 0.6665,
"step": 5659
},
{
"epoch": 0.36,
"grad_norm": 0.9224042892456055,
"learning_rate": 7.426191489605936e-06,
"loss": 0.596,
"step": 5660
},
{
"epoch": 0.36,
"grad_norm": 0.8789502382278442,
"learning_rate": 7.425294328176984e-06,
"loss": 0.567,
"step": 5661
},
{
"epoch": 0.36,
"grad_norm": 0.8930298686027527,
"learning_rate": 7.4243970646273236e-06,
"loss": 0.5902,
"step": 5662
},
{
"epoch": 0.36,
"grad_norm": 0.8686020970344543,
"learning_rate": 7.423499698994737e-06,
"loss": 0.5841,
"step": 5663
},
{
"epoch": 0.36,
"grad_norm": 0.8149586319923401,
"learning_rate": 7.422602231317009e-06,
"loss": 0.5857,
"step": 5664
},
{
"epoch": 0.36,
"grad_norm": 0.9393472075462341,
"learning_rate": 7.421704661631929e-06,
"loss": 0.6012,
"step": 5665
},
{
"epoch": 0.36,
"grad_norm": 0.8844897150993347,
"learning_rate": 7.42080698997729e-06,
"loss": 0.6458,
"step": 5666
},
{
"epoch": 0.36,
"grad_norm": 0.8492723107337952,
"learning_rate": 7.419909216390889e-06,
"loss": 0.6077,
"step": 5667
},
{
"epoch": 0.36,
"grad_norm": 0.8630576133728027,
"learning_rate": 7.4190113409105304e-06,
"loss": 0.5597,
"step": 5668
},
{
"epoch": 0.36,
"grad_norm": 0.8691625595092773,
"learning_rate": 7.418113363574018e-06,
"loss": 0.5845,
"step": 5669
},
{
"epoch": 0.36,
"grad_norm": 0.9205952286720276,
"learning_rate": 7.417215284419165e-06,
"loss": 0.6316,
"step": 5670
},
{
"epoch": 0.36,
"grad_norm": 0.9179142713546753,
"learning_rate": 7.416317103483784e-06,
"loss": 0.6545,
"step": 5671
},
{
"epoch": 0.36,
"grad_norm": 0.9906255602836609,
"learning_rate": 7.415418820805698e-06,
"loss": 0.5923,
"step": 5672
},
{
"epoch": 0.36,
"grad_norm": 0.8854468464851379,
"learning_rate": 7.414520436422725e-06,
"loss": 0.6767,
"step": 5673
},
{
"epoch": 0.36,
"grad_norm": 0.8236328959465027,
"learning_rate": 7.413621950372698e-06,
"loss": 0.5705,
"step": 5674
},
{
"epoch": 0.36,
"grad_norm": 0.8900964856147766,
"learning_rate": 7.4127233626934456e-06,
"loss": 0.615,
"step": 5675
},
{
"epoch": 0.36,
"grad_norm": 0.9543713927268982,
"learning_rate": 7.411824673422808e-06,
"loss": 0.6227,
"step": 5676
},
{
"epoch": 0.36,
"grad_norm": 0.8835585713386536,
"learning_rate": 7.410925882598621e-06,
"loss": 0.6486,
"step": 5677
},
{
"epoch": 0.36,
"grad_norm": 0.8607789874076843,
"learning_rate": 7.410026990258734e-06,
"loss": 0.595,
"step": 5678
},
{
"epoch": 0.36,
"grad_norm": 0.8899136781692505,
"learning_rate": 7.409127996440993e-06,
"loss": 0.5775,
"step": 5679
},
{
"epoch": 0.36,
"grad_norm": 0.9142584204673767,
"learning_rate": 7.408228901183254e-06,
"loss": 0.6192,
"step": 5680
},
{
"epoch": 0.36,
"grad_norm": 0.9016997218132019,
"learning_rate": 7.407329704523372e-06,
"loss": 0.6105,
"step": 5681
},
{
"epoch": 0.36,
"grad_norm": 0.9058455228805542,
"learning_rate": 7.406430406499212e-06,
"loss": 0.6091,
"step": 5682
},
{
"epoch": 0.36,
"grad_norm": 0.8867766261100769,
"learning_rate": 7.405531007148638e-06,
"loss": 0.6034,
"step": 5683
},
{
"epoch": 0.36,
"grad_norm": 0.8825892210006714,
"learning_rate": 7.404631506509523e-06,
"loss": 0.5718,
"step": 5684
},
{
"epoch": 0.36,
"grad_norm": 0.9403483867645264,
"learning_rate": 7.403731904619739e-06,
"loss": 0.6664,
"step": 5685
},
{
"epoch": 0.36,
"grad_norm": 0.8549020886421204,
"learning_rate": 7.402832201517166e-06,
"loss": 0.6042,
"step": 5686
},
{
"epoch": 0.36,
"grad_norm": 0.9207231402397156,
"learning_rate": 7.40193239723969e-06,
"loss": 0.6183,
"step": 5687
},
{
"epoch": 0.36,
"grad_norm": 0.8826068639755249,
"learning_rate": 7.401032491825194e-06,
"loss": 0.6157,
"step": 5688
},
{
"epoch": 0.36,
"grad_norm": 0.9273738265037537,
"learning_rate": 7.400132485311573e-06,
"loss": 0.6135,
"step": 5689
},
{
"epoch": 0.36,
"grad_norm": 0.9432485699653625,
"learning_rate": 7.399232377736722e-06,
"loss": 0.641,
"step": 5690
},
{
"epoch": 0.36,
"grad_norm": 0.957802951335907,
"learning_rate": 7.398332169138544e-06,
"loss": 0.614,
"step": 5691
},
{
"epoch": 0.36,
"grad_norm": 0.9042625427246094,
"learning_rate": 7.397431859554941e-06,
"loss": 0.6075,
"step": 5692
},
{
"epoch": 0.36,
"grad_norm": 0.8862330317497253,
"learning_rate": 7.396531449023821e-06,
"loss": 0.5823,
"step": 5693
},
{
"epoch": 0.36,
"grad_norm": 0.8898954391479492,
"learning_rate": 7.395630937583099e-06,
"loss": 0.6319,
"step": 5694
},
{
"epoch": 0.36,
"grad_norm": 0.8645822405815125,
"learning_rate": 7.394730325270693e-06,
"loss": 0.6029,
"step": 5695
},
{
"epoch": 0.36,
"grad_norm": 0.9035110473632812,
"learning_rate": 7.393829612124524e-06,
"loss": 0.6147,
"step": 5696
},
{
"epoch": 0.36,
"grad_norm": 0.8781278133392334,
"learning_rate": 7.392928798182516e-06,
"loss": 0.5922,
"step": 5697
},
{
"epoch": 0.36,
"grad_norm": 0.8469416499137878,
"learning_rate": 7.392027883482602e-06,
"loss": 0.6564,
"step": 5698
},
{
"epoch": 0.36,
"grad_norm": 0.8922897577285767,
"learning_rate": 7.391126868062714e-06,
"loss": 0.5904,
"step": 5699
},
{
"epoch": 0.36,
"grad_norm": 0.8153558969497681,
"learning_rate": 7.390225751960792e-06,
"loss": 0.5945,
"step": 5700
},
{
"epoch": 0.36,
"grad_norm": 0.8306947350502014,
"learning_rate": 7.389324535214779e-06,
"loss": 0.623,
"step": 5701
},
{
"epoch": 0.36,
"grad_norm": 0.9121823906898499,
"learning_rate": 7.388423217862621e-06,
"loss": 0.6231,
"step": 5702
},
{
"epoch": 0.36,
"grad_norm": 0.9361130595207214,
"learning_rate": 7.387521799942271e-06,
"loss": 0.6028,
"step": 5703
},
{
"epoch": 0.36,
"grad_norm": 0.8886232972145081,
"learning_rate": 7.386620281491683e-06,
"loss": 0.6612,
"step": 5704
},
{
"epoch": 0.36,
"grad_norm": 0.8650026917457581,
"learning_rate": 7.385718662548817e-06,
"loss": 0.6552,
"step": 5705
},
{
"epoch": 0.36,
"grad_norm": 0.9328054785728455,
"learning_rate": 7.384816943151638e-06,
"loss": 0.604,
"step": 5706
},
{
"epoch": 0.36,
"grad_norm": 0.8479319214820862,
"learning_rate": 7.383915123338113e-06,
"loss": 0.5936,
"step": 5707
},
{
"epoch": 0.36,
"grad_norm": 0.8170728087425232,
"learning_rate": 7.3830132031462165e-06,
"loss": 0.6188,
"step": 5708
},
{
"epoch": 0.36,
"grad_norm": 0.8698776364326477,
"learning_rate": 7.382111182613923e-06,
"loss": 0.5548,
"step": 5709
},
{
"epoch": 0.36,
"grad_norm": 0.8348639607429504,
"learning_rate": 7.381209061779214e-06,
"loss": 0.635,
"step": 5710
},
{
"epoch": 0.36,
"grad_norm": 0.9122574329376221,
"learning_rate": 7.380306840680076e-06,
"loss": 0.6264,
"step": 5711
},
{
"epoch": 0.36,
"grad_norm": 0.8634544014930725,
"learning_rate": 7.379404519354496e-06,
"loss": 0.6421,
"step": 5712
},
{
"epoch": 0.36,
"grad_norm": 0.830940306186676,
"learning_rate": 7.378502097840471e-06,
"loss": 0.599,
"step": 5713
},
{
"epoch": 0.36,
"grad_norm": 0.9275731444358826,
"learning_rate": 7.377599576175995e-06,
"loss": 0.6653,
"step": 5714
},
{
"epoch": 0.36,
"grad_norm": 0.9509021639823914,
"learning_rate": 7.376696954399073e-06,
"loss": 0.6107,
"step": 5715
},
{
"epoch": 0.36,
"grad_norm": 0.8291517496109009,
"learning_rate": 7.37579423254771e-06,
"loss": 0.6103,
"step": 5716
},
{
"epoch": 0.36,
"grad_norm": 0.864422082901001,
"learning_rate": 7.374891410659917e-06,
"loss": 0.6241,
"step": 5717
},
{
"epoch": 0.36,
"grad_norm": 0.8923708200454712,
"learning_rate": 7.373988488773708e-06,
"loss": 0.6212,
"step": 5718
},
{
"epoch": 0.36,
"grad_norm": 0.8855364918708801,
"learning_rate": 7.3730854669271015e-06,
"loss": 0.5703,
"step": 5719
},
{
"epoch": 0.36,
"grad_norm": 0.8736538290977478,
"learning_rate": 7.372182345158122e-06,
"loss": 0.6469,
"step": 5720
},
{
"epoch": 0.36,
"grad_norm": 0.9270285964012146,
"learning_rate": 7.3712791235047976e-06,
"loss": 0.6194,
"step": 5721
},
{
"epoch": 0.36,
"grad_norm": 0.9518702030181885,
"learning_rate": 7.370375802005157e-06,
"loss": 0.6458,
"step": 5722
},
{
"epoch": 0.36,
"grad_norm": 0.948585569858551,
"learning_rate": 7.369472380697236e-06,
"loss": 0.5592,
"step": 5723
},
{
"epoch": 0.36,
"grad_norm": 0.8145323991775513,
"learning_rate": 7.368568859619078e-06,
"loss": 0.5643,
"step": 5724
},
{
"epoch": 0.36,
"grad_norm": 0.88991379737854,
"learning_rate": 7.3676652388087234e-06,
"loss": 0.6035,
"step": 5725
},
{
"epoch": 0.36,
"grad_norm": 0.9013904333114624,
"learning_rate": 7.366761518304223e-06,
"loss": 0.598,
"step": 5726
},
{
"epoch": 0.36,
"grad_norm": 0.9081125259399414,
"learning_rate": 7.365857698143628e-06,
"loss": 0.5988,
"step": 5727
},
{
"epoch": 0.36,
"grad_norm": 0.9681587219238281,
"learning_rate": 7.364953778364996e-06,
"loss": 0.6415,
"step": 5728
},
{
"epoch": 0.36,
"grad_norm": 0.8465878367424011,
"learning_rate": 7.364049759006387e-06,
"loss": 0.6199,
"step": 5729
},
{
"epoch": 0.36,
"grad_norm": 0.8657549619674683,
"learning_rate": 7.363145640105867e-06,
"loss": 0.5782,
"step": 5730
},
{
"epoch": 0.36,
"grad_norm": 0.8750969171524048,
"learning_rate": 7.362241421701505e-06,
"loss": 0.5967,
"step": 5731
},
{
"epoch": 0.36,
"grad_norm": 0.9876574277877808,
"learning_rate": 7.3613371038313744e-06,
"loss": 0.6121,
"step": 5732
},
{
"epoch": 0.36,
"grad_norm": 0.906506359577179,
"learning_rate": 7.360432686533552e-06,
"loss": 0.6573,
"step": 5733
},
{
"epoch": 0.36,
"grad_norm": 0.8797792196273804,
"learning_rate": 7.359528169846121e-06,
"loss": 0.6132,
"step": 5734
},
{
"epoch": 0.36,
"grad_norm": 0.7947115898132324,
"learning_rate": 7.358623553807167e-06,
"loss": 0.5339,
"step": 5735
},
{
"epoch": 0.36,
"grad_norm": 0.8234474658966064,
"learning_rate": 7.35771883845478e-06,
"loss": 0.5606,
"step": 5736
},
{
"epoch": 0.36,
"grad_norm": 0.827809751033783,
"learning_rate": 7.356814023827055e-06,
"loss": 0.5549,
"step": 5737
},
{
"epoch": 0.36,
"grad_norm": 0.9173133373260498,
"learning_rate": 7.35590910996209e-06,
"loss": 0.608,
"step": 5738
},
{
"epoch": 0.36,
"grad_norm": 0.8398633599281311,
"learning_rate": 7.355004096897987e-06,
"loss": 0.5656,
"step": 5739
},
{
"epoch": 0.36,
"grad_norm": 0.8507029414176941,
"learning_rate": 7.354098984672856e-06,
"loss": 0.5315,
"step": 5740
},
{
"epoch": 0.36,
"grad_norm": 0.9494758248329163,
"learning_rate": 7.353193773324805e-06,
"loss": 0.6437,
"step": 5741
},
{
"epoch": 0.36,
"grad_norm": 0.8865925669670105,
"learning_rate": 7.35228846289195e-06,
"loss": 0.5627,
"step": 5742
},
{
"epoch": 0.36,
"grad_norm": 0.9043111205101013,
"learning_rate": 7.351383053412411e-06,
"loss": 0.6526,
"step": 5743
},
{
"epoch": 0.36,
"grad_norm": 0.8272423148155212,
"learning_rate": 7.350477544924313e-06,
"loss": 0.6375,
"step": 5744
},
{
"epoch": 0.36,
"grad_norm": 0.8952882885932922,
"learning_rate": 7.349571937465782e-06,
"loss": 0.6383,
"step": 5745
},
{
"epoch": 0.36,
"grad_norm": 0.9154927730560303,
"learning_rate": 7.348666231074948e-06,
"loss": 0.5916,
"step": 5746
},
{
"epoch": 0.36,
"grad_norm": 0.8953961730003357,
"learning_rate": 7.3477604257899515e-06,
"loss": 0.6092,
"step": 5747
},
{
"epoch": 0.36,
"grad_norm": 0.9233314990997314,
"learning_rate": 7.346854521648929e-06,
"loss": 0.6414,
"step": 5748
},
{
"epoch": 0.36,
"grad_norm": 0.8458792567253113,
"learning_rate": 7.345948518690029e-06,
"loss": 0.556,
"step": 5749
},
{
"epoch": 0.36,
"grad_norm": 0.9279628396034241,
"learning_rate": 7.345042416951395e-06,
"loss": 0.6147,
"step": 5750
},
{
"epoch": 0.36,
"grad_norm": 0.8384361267089844,
"learning_rate": 7.344136216471185e-06,
"loss": 0.5691,
"step": 5751
},
{
"epoch": 0.36,
"grad_norm": 0.8720436096191406,
"learning_rate": 7.343229917287552e-06,
"loss": 0.5882,
"step": 5752
},
{
"epoch": 0.36,
"grad_norm": 0.9607126712799072,
"learning_rate": 7.34232351943866e-06,
"loss": 0.5573,
"step": 5753
},
{
"epoch": 0.36,
"grad_norm": 0.8432719707489014,
"learning_rate": 7.341417022962671e-06,
"loss": 0.54,
"step": 5754
},
{
"epoch": 0.36,
"grad_norm": 0.9096271395683289,
"learning_rate": 7.340510427897759e-06,
"loss": 0.5467,
"step": 5755
},
{
"epoch": 0.36,
"grad_norm": 0.9777395725250244,
"learning_rate": 7.339603734282093e-06,
"loss": 0.6271,
"step": 5756
},
{
"epoch": 0.36,
"grad_norm": 0.8779467344284058,
"learning_rate": 7.338696942153855e-06,
"loss": 0.6058,
"step": 5757
},
{
"epoch": 0.36,
"grad_norm": 0.8599120378494263,
"learning_rate": 7.337790051551221e-06,
"loss": 0.5776,
"step": 5758
},
{
"epoch": 0.36,
"grad_norm": 0.8779652118682861,
"learning_rate": 7.3368830625123835e-06,
"loss": 0.6134,
"step": 5759
},
{
"epoch": 0.36,
"grad_norm": 0.903643012046814,
"learning_rate": 7.335975975075529e-06,
"loss": 0.6908,
"step": 5760
},
{
"epoch": 0.36,
"grad_norm": 0.895206093788147,
"learning_rate": 7.3350687892788505e-06,
"loss": 0.5559,
"step": 5761
},
{
"epoch": 0.37,
"grad_norm": 0.907238781452179,
"learning_rate": 7.33416150516055e-06,
"loss": 0.5807,
"step": 5762
},
{
"epoch": 0.37,
"grad_norm": 0.877465546131134,
"learning_rate": 7.333254122758828e-06,
"loss": 0.6332,
"step": 5763
},
{
"epoch": 0.37,
"grad_norm": 0.8640191555023193,
"learning_rate": 7.332346642111893e-06,
"loss": 0.6103,
"step": 5764
},
{
"epoch": 0.37,
"grad_norm": 0.8886452317237854,
"learning_rate": 7.331439063257953e-06,
"loss": 0.6206,
"step": 5765
},
{
"epoch": 0.37,
"grad_norm": 0.8364898562431335,
"learning_rate": 7.330531386235225e-06,
"loss": 0.5864,
"step": 5766
},
{
"epoch": 0.37,
"grad_norm": 0.9376548528671265,
"learning_rate": 7.329623611081927e-06,
"loss": 0.6066,
"step": 5767
},
{
"epoch": 0.37,
"grad_norm": 0.8801112174987793,
"learning_rate": 7.3287157378362846e-06,
"loss": 0.5709,
"step": 5768
},
{
"epoch": 0.37,
"grad_norm": 0.8653738498687744,
"learning_rate": 7.327807766536521e-06,
"loss": 0.6407,
"step": 5769
},
{
"epoch": 0.37,
"grad_norm": 0.8694636821746826,
"learning_rate": 7.3268996972208725e-06,
"loss": 0.6672,
"step": 5770
},
{
"epoch": 0.37,
"grad_norm": 0.8904354572296143,
"learning_rate": 7.325991529927572e-06,
"loss": 0.6163,
"step": 5771
},
{
"epoch": 0.37,
"grad_norm": 0.9113852977752686,
"learning_rate": 7.325083264694859e-06,
"loss": 0.5539,
"step": 5772
},
{
"epoch": 0.37,
"grad_norm": 0.8761439919471741,
"learning_rate": 7.324174901560978e-06,
"loss": 0.6205,
"step": 5773
},
{
"epoch": 0.37,
"grad_norm": 0.8492023944854736,
"learning_rate": 7.323266440564177e-06,
"loss": 0.5814,
"step": 5774
},
{
"epoch": 0.37,
"grad_norm": 0.8769062757492065,
"learning_rate": 7.32235788174271e-06,
"loss": 0.5551,
"step": 5775
},
{
"epoch": 0.37,
"grad_norm": 0.8555404543876648,
"learning_rate": 7.32144922513483e-06,
"loss": 0.5793,
"step": 5776
},
{
"epoch": 0.37,
"grad_norm": 0.874083936214447,
"learning_rate": 7.320540470778799e-06,
"loss": 0.6028,
"step": 5777
},
{
"epoch": 0.37,
"grad_norm": 0.9265373945236206,
"learning_rate": 7.319631618712881e-06,
"loss": 0.6117,
"step": 5778
},
{
"epoch": 0.37,
"grad_norm": 0.8393657803535461,
"learning_rate": 7.318722668975347e-06,
"loss": 0.5443,
"step": 5779
},
{
"epoch": 0.37,
"grad_norm": 0.844636857509613,
"learning_rate": 7.317813621604466e-06,
"loss": 0.6026,
"step": 5780
},
{
"epoch": 0.37,
"grad_norm": 0.853661060333252,
"learning_rate": 7.316904476638515e-06,
"loss": 0.565,
"step": 5781
},
{
"epoch": 0.37,
"grad_norm": 0.871853768825531,
"learning_rate": 7.315995234115778e-06,
"loss": 0.5588,
"step": 5782
},
{
"epoch": 0.37,
"grad_norm": 0.814250111579895,
"learning_rate": 7.315085894074539e-06,
"loss": 0.5909,
"step": 5783
},
{
"epoch": 0.37,
"grad_norm": 0.904152512550354,
"learning_rate": 7.314176456553086e-06,
"loss": 0.5795,
"step": 5784
},
{
"epoch": 0.37,
"grad_norm": 0.8134939074516296,
"learning_rate": 7.3132669215897125e-06,
"loss": 0.6044,
"step": 5785
},
{
"epoch": 0.37,
"grad_norm": 0.8810901641845703,
"learning_rate": 7.312357289222717e-06,
"loss": 0.6512,
"step": 5786
},
{
"epoch": 0.37,
"grad_norm": 0.8648774027824402,
"learning_rate": 7.3114475594904e-06,
"loss": 0.5882,
"step": 5787
},
{
"epoch": 0.37,
"grad_norm": 0.8309141993522644,
"learning_rate": 7.310537732431067e-06,
"loss": 0.6525,
"step": 5788
},
{
"epoch": 0.37,
"grad_norm": 0.9296196699142456,
"learning_rate": 7.309627808083027e-06,
"loss": 0.642,
"step": 5789
},
{
"epoch": 0.37,
"grad_norm": 0.9500271081924438,
"learning_rate": 7.308717786484596e-06,
"loss": 0.5761,
"step": 5790
},
{
"epoch": 0.37,
"grad_norm": 0.9415786266326904,
"learning_rate": 7.30780766767409e-06,
"loss": 0.7613,
"step": 5791
},
{
"epoch": 0.37,
"grad_norm": 0.771344780921936,
"learning_rate": 7.306897451689832e-06,
"loss": 0.5429,
"step": 5792
},
{
"epoch": 0.37,
"grad_norm": 0.8383582830429077,
"learning_rate": 7.305987138570145e-06,
"loss": 0.5749,
"step": 5793
},
{
"epoch": 0.37,
"grad_norm": 0.9000876545906067,
"learning_rate": 7.305076728353364e-06,
"loss": 0.592,
"step": 5794
},
{
"epoch": 0.37,
"grad_norm": 0.841670572757721,
"learning_rate": 7.30416622107782e-06,
"loss": 0.5762,
"step": 5795
},
{
"epoch": 0.37,
"grad_norm": 0.8553557395935059,
"learning_rate": 7.303255616781853e-06,
"loss": 0.6068,
"step": 5796
},
{
"epoch": 0.37,
"grad_norm": 0.9532732367515564,
"learning_rate": 7.3023449155038016e-06,
"loss": 0.6489,
"step": 5797
},
{
"epoch": 0.37,
"grad_norm": 0.8923346996307373,
"learning_rate": 7.301434117282018e-06,
"loss": 0.6421,
"step": 5798
},
{
"epoch": 0.37,
"grad_norm": 0.8571204543113708,
"learning_rate": 7.300523222154848e-06,
"loss": 0.5909,
"step": 5799
},
{
"epoch": 0.37,
"grad_norm": 0.9138479232788086,
"learning_rate": 7.299612230160648e-06,
"loss": 0.6169,
"step": 5800
},
{
"epoch": 0.37,
"grad_norm": 0.9442511796951294,
"learning_rate": 7.298701141337778e-06,
"loss": 0.5826,
"step": 5801
},
{
"epoch": 0.37,
"grad_norm": 0.8205499053001404,
"learning_rate": 7.2977899557246e-06,
"loss": 0.6157,
"step": 5802
},
{
"epoch": 0.37,
"grad_norm": 0.8094413876533508,
"learning_rate": 7.2968786733594795e-06,
"loss": 0.5982,
"step": 5803
},
{
"epoch": 0.37,
"grad_norm": 0.8342402577400208,
"learning_rate": 7.295967294280788e-06,
"loss": 0.6123,
"step": 5804
},
{
"epoch": 0.37,
"grad_norm": 0.883686363697052,
"learning_rate": 7.2950558185269005e-06,
"loss": 0.5801,
"step": 5805
},
{
"epoch": 0.37,
"grad_norm": 0.9100261330604553,
"learning_rate": 7.294144246136198e-06,
"loss": 0.5688,
"step": 5806
},
{
"epoch": 0.37,
"grad_norm": 0.8626593351364136,
"learning_rate": 7.29323257714706e-06,
"loss": 0.6418,
"step": 5807
},
{
"epoch": 0.37,
"grad_norm": 0.8720927834510803,
"learning_rate": 7.292320811597877e-06,
"loss": 0.6389,
"step": 5808
},
{
"epoch": 0.37,
"grad_norm": 0.8386964797973633,
"learning_rate": 7.291408949527039e-06,
"loss": 0.5383,
"step": 5809
},
{
"epoch": 0.37,
"grad_norm": 0.921635091304779,
"learning_rate": 7.290496990972942e-06,
"loss": 0.5874,
"step": 5810
},
{
"epoch": 0.37,
"grad_norm": 0.8803329467773438,
"learning_rate": 7.2895849359739834e-06,
"loss": 0.5582,
"step": 5811
},
{
"epoch": 0.37,
"grad_norm": 0.9119853973388672,
"learning_rate": 7.288672784568568e-06,
"loss": 0.6075,
"step": 5812
},
{
"epoch": 0.37,
"grad_norm": 0.8550745844841003,
"learning_rate": 7.2877605367951055e-06,
"loss": 0.5818,
"step": 5813
},
{
"epoch": 0.37,
"grad_norm": 0.8705887794494629,
"learning_rate": 7.286848192692003e-06,
"loss": 0.5768,
"step": 5814
},
{
"epoch": 0.37,
"grad_norm": 0.8665969371795654,
"learning_rate": 7.28593575229768e-06,
"loss": 0.6108,
"step": 5815
},
{
"epoch": 0.37,
"grad_norm": 0.8779606819152832,
"learning_rate": 7.285023215650553e-06,
"loss": 0.5621,
"step": 5816
},
{
"epoch": 0.37,
"grad_norm": 0.8697792291641235,
"learning_rate": 7.2841105827890475e-06,
"loss": 0.6248,
"step": 5817
},
{
"epoch": 0.37,
"grad_norm": 0.9603003859519958,
"learning_rate": 7.283197853751593e-06,
"loss": 0.6527,
"step": 5818
},
{
"epoch": 0.37,
"grad_norm": 0.9190054535865784,
"learning_rate": 7.282285028576618e-06,
"loss": 0.6703,
"step": 5819
},
{
"epoch": 0.37,
"grad_norm": 0.9047878980636597,
"learning_rate": 7.28137210730256e-06,
"loss": 0.6406,
"step": 5820
},
{
"epoch": 0.37,
"grad_norm": 0.8862581849098206,
"learning_rate": 7.280459089967861e-06,
"loss": 0.5556,
"step": 5821
},
{
"epoch": 0.37,
"grad_norm": 0.8609002232551575,
"learning_rate": 7.279545976610961e-06,
"loss": 0.5763,
"step": 5822
},
{
"epoch": 0.37,
"grad_norm": 0.9380242824554443,
"learning_rate": 7.278632767270309e-06,
"loss": 0.617,
"step": 5823
},
{
"epoch": 0.37,
"grad_norm": 0.827458381652832,
"learning_rate": 7.277719461984361e-06,
"loss": 0.5788,
"step": 5824
},
{
"epoch": 0.37,
"grad_norm": 0.8551861047744751,
"learning_rate": 7.276806060791567e-06,
"loss": 0.6054,
"step": 5825
},
{
"epoch": 0.37,
"grad_norm": 0.8845090270042419,
"learning_rate": 7.275892563730393e-06,
"loss": 0.6049,
"step": 5826
},
{
"epoch": 0.37,
"grad_norm": 0.8537983894348145,
"learning_rate": 7.274978970839297e-06,
"loss": 0.5715,
"step": 5827
},
{
"epoch": 0.37,
"grad_norm": 0.8627631068229675,
"learning_rate": 7.274065282156752e-06,
"loss": 0.5343,
"step": 5828
},
{
"epoch": 0.37,
"grad_norm": 0.9428598284721375,
"learning_rate": 7.273151497721229e-06,
"loss": 0.6423,
"step": 5829
},
{
"epoch": 0.37,
"grad_norm": 0.8636415600776672,
"learning_rate": 7.272237617571205e-06,
"loss": 0.5829,
"step": 5830
},
{
"epoch": 0.37,
"grad_norm": 0.9982849359512329,
"learning_rate": 7.2713236417451584e-06,
"loss": 0.6376,
"step": 5831
},
{
"epoch": 0.37,
"grad_norm": 0.8668151497840881,
"learning_rate": 7.2704095702815754e-06,
"loss": 0.5882,
"step": 5832
},
{
"epoch": 0.37,
"grad_norm": 0.9315029382705688,
"learning_rate": 7.269495403218943e-06,
"loss": 0.5898,
"step": 5833
},
{
"epoch": 0.37,
"grad_norm": 0.8428326845169067,
"learning_rate": 7.268581140595754e-06,
"loss": 0.5528,
"step": 5834
},
{
"epoch": 0.37,
"grad_norm": 0.8342899084091187,
"learning_rate": 7.267666782450505e-06,
"loss": 0.5497,
"step": 5835
},
{
"epoch": 0.37,
"grad_norm": 0.9424355030059814,
"learning_rate": 7.266752328821698e-06,
"loss": 0.6838,
"step": 5836
},
{
"epoch": 0.37,
"grad_norm": 0.8566783666610718,
"learning_rate": 7.265837779747834e-06,
"loss": 0.5478,
"step": 5837
},
{
"epoch": 0.37,
"grad_norm": 0.982837975025177,
"learning_rate": 7.264923135267425e-06,
"loss": 0.6028,
"step": 5838
},
{
"epoch": 0.37,
"grad_norm": 0.9721706509590149,
"learning_rate": 7.264008395418981e-06,
"loss": 0.6461,
"step": 5839
},
{
"epoch": 0.37,
"grad_norm": 0.8464512825012207,
"learning_rate": 7.263093560241019e-06,
"loss": 0.5897,
"step": 5840
},
{
"epoch": 0.37,
"grad_norm": 0.8291548490524292,
"learning_rate": 7.262178629772061e-06,
"loss": 0.5641,
"step": 5841
},
{
"epoch": 0.37,
"grad_norm": 0.9384708404541016,
"learning_rate": 7.261263604050628e-06,
"loss": 0.5823,
"step": 5842
},
{
"epoch": 0.37,
"grad_norm": 0.8504778146743774,
"learning_rate": 7.260348483115254e-06,
"loss": 0.5374,
"step": 5843
},
{
"epoch": 0.37,
"grad_norm": 0.848728358745575,
"learning_rate": 7.259433267004466e-06,
"loss": 0.5656,
"step": 5844
},
{
"epoch": 0.37,
"grad_norm": 0.8592720031738281,
"learning_rate": 7.258517955756805e-06,
"loss": 0.6059,
"step": 5845
},
{
"epoch": 0.37,
"grad_norm": 0.8951132297515869,
"learning_rate": 7.257602549410808e-06,
"loss": 0.5942,
"step": 5846
},
{
"epoch": 0.37,
"grad_norm": 0.9378473162651062,
"learning_rate": 7.256687048005024e-06,
"loss": 0.6411,
"step": 5847
},
{
"epoch": 0.37,
"grad_norm": 0.9194514751434326,
"learning_rate": 7.255771451577996e-06,
"loss": 0.6337,
"step": 5848
},
{
"epoch": 0.37,
"grad_norm": 0.8979505300521851,
"learning_rate": 7.254855760168281e-06,
"loss": 0.5663,
"step": 5849
},
{
"epoch": 0.37,
"grad_norm": 0.8199179768562317,
"learning_rate": 7.2539399738144325e-06,
"loss": 0.5586,
"step": 5850
},
{
"epoch": 0.37,
"grad_norm": 0.9413596391677856,
"learning_rate": 7.2530240925550145e-06,
"loss": 0.589,
"step": 5851
},
{
"epoch": 0.37,
"grad_norm": 0.9242424964904785,
"learning_rate": 7.252108116428589e-06,
"loss": 0.5799,
"step": 5852
},
{
"epoch": 0.37,
"grad_norm": 0.9168336391448975,
"learning_rate": 7.251192045473725e-06,
"loss": 0.5839,
"step": 5853
},
{
"epoch": 0.37,
"grad_norm": 0.9121633768081665,
"learning_rate": 7.250275879728995e-06,
"loss": 0.5788,
"step": 5854
},
{
"epoch": 0.37,
"grad_norm": 0.8337844014167786,
"learning_rate": 7.249359619232976e-06,
"loss": 0.5781,
"step": 5855
},
{
"epoch": 0.37,
"grad_norm": 0.9401801228523254,
"learning_rate": 7.24844326402425e-06,
"loss": 0.6175,
"step": 5856
},
{
"epoch": 0.37,
"grad_norm": 0.8701263666152954,
"learning_rate": 7.247526814141398e-06,
"loss": 0.5735,
"step": 5857
},
{
"epoch": 0.37,
"grad_norm": 0.8712503910064697,
"learning_rate": 7.2466102696230115e-06,
"loss": 0.6065,
"step": 5858
},
{
"epoch": 0.37,
"grad_norm": 0.8291772603988647,
"learning_rate": 7.24569363050768e-06,
"loss": 0.6063,
"step": 5859
},
{
"epoch": 0.37,
"grad_norm": 0.843247652053833,
"learning_rate": 7.244776896834004e-06,
"loss": 0.5898,
"step": 5860
},
{
"epoch": 0.37,
"grad_norm": 0.8736797571182251,
"learning_rate": 7.243860068640581e-06,
"loss": 0.6175,
"step": 5861
},
{
"epoch": 0.37,
"grad_norm": 0.9105240106582642,
"learning_rate": 7.242943145966016e-06,
"loss": 0.5534,
"step": 5862
},
{
"epoch": 0.37,
"grad_norm": 0.901670515537262,
"learning_rate": 7.242026128848918e-06,
"loss": 0.5567,
"step": 5863
},
{
"epoch": 0.37,
"grad_norm": 0.8726474642753601,
"learning_rate": 7.241109017327901e-06,
"loss": 0.5742,
"step": 5864
},
{
"epoch": 0.37,
"grad_norm": 0.8959450125694275,
"learning_rate": 7.240191811441577e-06,
"loss": 0.6154,
"step": 5865
},
{
"epoch": 0.37,
"grad_norm": 0.9082683324813843,
"learning_rate": 7.239274511228569e-06,
"loss": 0.6233,
"step": 5866
},
{
"epoch": 0.37,
"grad_norm": 0.8369854092597961,
"learning_rate": 7.238357116727502e-06,
"loss": 0.634,
"step": 5867
},
{
"epoch": 0.37,
"grad_norm": 0.9661149978637695,
"learning_rate": 7.2374396279770044e-06,
"loss": 0.5991,
"step": 5868
},
{
"epoch": 0.37,
"grad_norm": 0.8639382719993591,
"learning_rate": 7.236522045015706e-06,
"loss": 0.5692,
"step": 5869
},
{
"epoch": 0.37,
"grad_norm": 0.8862959742546082,
"learning_rate": 7.235604367882245e-06,
"loss": 0.6189,
"step": 5870
},
{
"epoch": 0.37,
"grad_norm": 0.8773701190948486,
"learning_rate": 7.234686596615262e-06,
"loss": 0.56,
"step": 5871
},
{
"epoch": 0.37,
"grad_norm": 0.8855640292167664,
"learning_rate": 7.2337687312534e-06,
"loss": 0.618,
"step": 5872
},
{
"epoch": 0.37,
"grad_norm": 0.8535584807395935,
"learning_rate": 7.232850771835307e-06,
"loss": 0.6135,
"step": 5873
},
{
"epoch": 0.37,
"grad_norm": 0.9095380306243896,
"learning_rate": 7.231932718399635e-06,
"loss": 0.6002,
"step": 5874
},
{
"epoch": 0.37,
"grad_norm": 0.9614174962043762,
"learning_rate": 7.231014570985042e-06,
"loss": 0.6079,
"step": 5875
},
{
"epoch": 0.37,
"grad_norm": 0.8840222358703613,
"learning_rate": 7.230096329630185e-06,
"loss": 0.5708,
"step": 5876
},
{
"epoch": 0.37,
"grad_norm": 0.8881139755249023,
"learning_rate": 7.22917799437373e-06,
"loss": 0.5693,
"step": 5877
},
{
"epoch": 0.37,
"grad_norm": 0.8951361775398254,
"learning_rate": 7.228259565254345e-06,
"loss": 0.6344,
"step": 5878
},
{
"epoch": 0.37,
"grad_norm": 0.9418209791183472,
"learning_rate": 7.227341042310702e-06,
"loss": 0.595,
"step": 5879
},
{
"epoch": 0.37,
"grad_norm": 0.964740514755249,
"learning_rate": 7.226422425581474e-06,
"loss": 0.6433,
"step": 5880
},
{
"epoch": 0.37,
"grad_norm": 0.8945766687393188,
"learning_rate": 7.225503715105344e-06,
"loss": 0.5805,
"step": 5881
},
{
"epoch": 0.37,
"grad_norm": 0.8209680914878845,
"learning_rate": 7.224584910920994e-06,
"loss": 0.6353,
"step": 5882
},
{
"epoch": 0.37,
"grad_norm": 0.9142740368843079,
"learning_rate": 7.223666013067113e-06,
"loss": 0.5583,
"step": 5883
},
{
"epoch": 0.37,
"grad_norm": 0.9378098845481873,
"learning_rate": 7.222747021582392e-06,
"loss": 0.5952,
"step": 5884
},
{
"epoch": 0.37,
"grad_norm": 0.9350360035896301,
"learning_rate": 7.221827936505524e-06,
"loss": 0.6235,
"step": 5885
},
{
"epoch": 0.37,
"grad_norm": 0.8425854444503784,
"learning_rate": 7.220908757875214e-06,
"loss": 0.5706,
"step": 5886
},
{
"epoch": 0.37,
"grad_norm": 0.8196877837181091,
"learning_rate": 7.21998948573016e-06,
"loss": 0.6116,
"step": 5887
},
{
"epoch": 0.37,
"grad_norm": 0.8354714512825012,
"learning_rate": 7.219070120109072e-06,
"loss": 0.545,
"step": 5888
},
{
"epoch": 0.37,
"grad_norm": 0.9335945248603821,
"learning_rate": 7.2181506610506605e-06,
"loss": 0.5873,
"step": 5889
},
{
"epoch": 0.37,
"grad_norm": 0.9078087210655212,
"learning_rate": 7.217231108593642e-06,
"loss": 0.6323,
"step": 5890
},
{
"epoch": 0.37,
"grad_norm": 0.8889597058296204,
"learning_rate": 7.2163114627767336e-06,
"loss": 0.5855,
"step": 5891
},
{
"epoch": 0.37,
"grad_norm": 0.9393039345741272,
"learning_rate": 7.21539172363866e-06,
"loss": 0.6515,
"step": 5892
},
{
"epoch": 0.37,
"grad_norm": 0.8929221034049988,
"learning_rate": 7.214471891218147e-06,
"loss": 0.5601,
"step": 5893
},
{
"epoch": 0.37,
"grad_norm": 0.8714567422866821,
"learning_rate": 7.213551965553927e-06,
"loss": 0.5709,
"step": 5894
},
{
"epoch": 0.37,
"grad_norm": 0.8751015067100525,
"learning_rate": 7.212631946684735e-06,
"loss": 0.5834,
"step": 5895
},
{
"epoch": 0.37,
"grad_norm": 0.8570420742034912,
"learning_rate": 7.211711834649308e-06,
"loss": 0.6357,
"step": 5896
},
{
"epoch": 0.37,
"grad_norm": 0.8587523102760315,
"learning_rate": 7.210791629486389e-06,
"loss": 0.6232,
"step": 5897
},
{
"epoch": 0.37,
"grad_norm": 0.9013690948486328,
"learning_rate": 7.209871331234727e-06,
"loss": 0.5748,
"step": 5898
},
{
"epoch": 0.37,
"grad_norm": 0.9406622052192688,
"learning_rate": 7.208950939933069e-06,
"loss": 0.6136,
"step": 5899
},
{
"epoch": 0.37,
"grad_norm": 0.8297491073608398,
"learning_rate": 7.208030455620172e-06,
"loss": 0.6091,
"step": 5900
},
{
"epoch": 0.37,
"grad_norm": 0.8118994235992432,
"learning_rate": 7.207109878334794e-06,
"loss": 0.5538,
"step": 5901
},
{
"epoch": 0.37,
"grad_norm": 0.8709977865219116,
"learning_rate": 7.206189208115697e-06,
"loss": 0.6218,
"step": 5902
},
{
"epoch": 0.37,
"grad_norm": 0.7942225337028503,
"learning_rate": 7.205268445001647e-06,
"loss": 0.5634,
"step": 5903
},
{
"epoch": 0.37,
"grad_norm": 0.9106520414352417,
"learning_rate": 7.204347589031413e-06,
"loss": 0.6096,
"step": 5904
},
{
"epoch": 0.37,
"grad_norm": 0.8729263544082642,
"learning_rate": 7.203426640243772e-06,
"loss": 0.5695,
"step": 5905
},
{
"epoch": 0.37,
"grad_norm": 0.8718299865722656,
"learning_rate": 7.2025055986775e-06,
"loss": 0.584,
"step": 5906
},
{
"epoch": 0.37,
"grad_norm": 0.877406895160675,
"learning_rate": 7.201584464371378e-06,
"loss": 0.6209,
"step": 5907
},
{
"epoch": 0.37,
"grad_norm": 0.8972481489181519,
"learning_rate": 7.200663237364195e-06,
"loss": 0.6161,
"step": 5908
},
{
"epoch": 0.37,
"grad_norm": 0.8868620991706848,
"learning_rate": 7.199741917694738e-06,
"loss": 0.6095,
"step": 5909
},
{
"epoch": 0.37,
"grad_norm": 0.9140734672546387,
"learning_rate": 7.198820505401801e-06,
"loss": 0.6437,
"step": 5910
},
{
"epoch": 0.37,
"grad_norm": 0.9390980005264282,
"learning_rate": 7.197899000524181e-06,
"loss": 0.6443,
"step": 5911
},
{
"epoch": 0.37,
"grad_norm": 0.8791154623031616,
"learning_rate": 7.196977403100681e-06,
"loss": 0.6049,
"step": 5912
},
{
"epoch": 0.37,
"grad_norm": 0.8596461415290833,
"learning_rate": 7.196055713170105e-06,
"loss": 0.577,
"step": 5913
},
{
"epoch": 0.37,
"grad_norm": 0.8921295404434204,
"learning_rate": 7.195133930771263e-06,
"loss": 0.6139,
"step": 5914
},
{
"epoch": 0.37,
"grad_norm": 0.8871878981590271,
"learning_rate": 7.194212055942966e-06,
"loss": 0.6127,
"step": 5915
},
{
"epoch": 0.37,
"grad_norm": 0.8868473172187805,
"learning_rate": 7.193290088724034e-06,
"loss": 0.5821,
"step": 5916
},
{
"epoch": 0.37,
"grad_norm": 0.8867928981781006,
"learning_rate": 7.192368029153285e-06,
"loss": 0.6544,
"step": 5917
},
{
"epoch": 0.37,
"grad_norm": 0.8952857851982117,
"learning_rate": 7.191445877269548e-06,
"loss": 0.5514,
"step": 5918
},
{
"epoch": 0.38,
"grad_norm": 0.9083967804908752,
"learning_rate": 7.190523633111644e-06,
"loss": 0.6256,
"step": 5919
},
{
"epoch": 0.38,
"grad_norm": 0.8887345194816589,
"learning_rate": 7.189601296718413e-06,
"loss": 0.6002,
"step": 5920
},
{
"epoch": 0.38,
"grad_norm": 0.8916110992431641,
"learning_rate": 7.188678868128687e-06,
"loss": 0.6277,
"step": 5921
},
{
"epoch": 0.38,
"grad_norm": 0.8981056809425354,
"learning_rate": 7.18775634738131e-06,
"loss": 0.6223,
"step": 5922
},
{
"epoch": 0.38,
"grad_norm": 0.9082187414169312,
"learning_rate": 7.18683373451512e-06,
"loss": 0.6221,
"step": 5923
},
{
"epoch": 0.38,
"grad_norm": 0.8695595860481262,
"learning_rate": 7.185911029568972e-06,
"loss": 0.607,
"step": 5924
},
{
"epoch": 0.38,
"grad_norm": 0.8874411582946777,
"learning_rate": 7.184988232581713e-06,
"loss": 0.6072,
"step": 5925
},
{
"epoch": 0.38,
"grad_norm": 0.8543808460235596,
"learning_rate": 7.184065343592203e-06,
"loss": 0.6432,
"step": 5926
},
{
"epoch": 0.38,
"grad_norm": 0.8796266317367554,
"learning_rate": 7.183142362639296e-06,
"loss": 0.6275,
"step": 5927
},
{
"epoch": 0.38,
"grad_norm": 0.8801624178886414,
"learning_rate": 7.18221928976186e-06,
"loss": 0.5814,
"step": 5928
},
{
"epoch": 0.38,
"grad_norm": 0.8554267287254333,
"learning_rate": 7.181296124998762e-06,
"loss": 0.6135,
"step": 5929
},
{
"epoch": 0.38,
"grad_norm": 0.9125354290008545,
"learning_rate": 7.180372868388873e-06,
"loss": 0.6057,
"step": 5930
},
{
"epoch": 0.38,
"grad_norm": 0.8697827458381653,
"learning_rate": 7.179449519971066e-06,
"loss": 0.5761,
"step": 5931
},
{
"epoch": 0.38,
"grad_norm": 0.8693752288818359,
"learning_rate": 7.178526079784221e-06,
"loss": 0.5969,
"step": 5932
},
{
"epoch": 0.38,
"grad_norm": 0.9136356711387634,
"learning_rate": 7.1776025478672225e-06,
"loss": 0.6007,
"step": 5933
},
{
"epoch": 0.38,
"grad_norm": 0.9643456339836121,
"learning_rate": 7.176678924258955e-06,
"loss": 0.6225,
"step": 5934
},
{
"epoch": 0.38,
"grad_norm": 0.8101844191551208,
"learning_rate": 7.175755208998311e-06,
"loss": 0.5552,
"step": 5935
},
{
"epoch": 0.38,
"grad_norm": 0.8744382262229919,
"learning_rate": 7.174831402124184e-06,
"loss": 0.6181,
"step": 5936
},
{
"epoch": 0.38,
"grad_norm": 0.9439733624458313,
"learning_rate": 7.173907503675472e-06,
"loss": 0.6245,
"step": 5937
},
{
"epoch": 0.38,
"grad_norm": 0.9993674159049988,
"learning_rate": 7.172983513691076e-06,
"loss": 0.633,
"step": 5938
},
{
"epoch": 0.38,
"grad_norm": 0.9159564971923828,
"learning_rate": 7.172059432209907e-06,
"loss": 0.5969,
"step": 5939
},
{
"epoch": 0.38,
"grad_norm": 0.9775694608688354,
"learning_rate": 7.171135259270868e-06,
"loss": 0.6291,
"step": 5940
},
{
"epoch": 0.38,
"grad_norm": 0.8840250968933105,
"learning_rate": 7.170210994912878e-06,
"loss": 0.5855,
"step": 5941
},
{
"epoch": 0.38,
"grad_norm": 0.8848263025283813,
"learning_rate": 7.169286639174852e-06,
"loss": 0.604,
"step": 5942
},
{
"epoch": 0.38,
"grad_norm": 0.943367063999176,
"learning_rate": 7.168362192095712e-06,
"loss": 0.6189,
"step": 5943
},
{
"epoch": 0.38,
"grad_norm": 1.0210529565811157,
"learning_rate": 7.1674376537143845e-06,
"loss": 0.6232,
"step": 5944
},
{
"epoch": 0.38,
"grad_norm": 0.9326754212379456,
"learning_rate": 7.166513024069797e-06,
"loss": 0.6188,
"step": 5945
},
{
"epoch": 0.38,
"grad_norm": 0.8790732622146606,
"learning_rate": 7.16558830320088e-06,
"loss": 0.628,
"step": 5946
},
{
"epoch": 0.38,
"grad_norm": 0.8562813401222229,
"learning_rate": 7.1646634911465765e-06,
"loss": 0.5557,
"step": 5947
},
{
"epoch": 0.38,
"grad_norm": 0.8628082871437073,
"learning_rate": 7.163738587945822e-06,
"loss": 0.5901,
"step": 5948
},
{
"epoch": 0.38,
"grad_norm": 0.9249915480613708,
"learning_rate": 7.162813593637563e-06,
"loss": 0.5991,
"step": 5949
},
{
"epoch": 0.38,
"grad_norm": 0.8744149208068848,
"learning_rate": 7.161888508260748e-06,
"loss": 0.6241,
"step": 5950
},
{
"epoch": 0.38,
"grad_norm": 0.8531312942504883,
"learning_rate": 7.160963331854327e-06,
"loss": 0.5488,
"step": 5951
},
{
"epoch": 0.38,
"grad_norm": 0.8790968060493469,
"learning_rate": 7.16003806445726e-06,
"loss": 0.5869,
"step": 5952
},
{
"epoch": 0.38,
"grad_norm": 0.8855732679367065,
"learning_rate": 7.159112706108502e-06,
"loss": 0.5524,
"step": 5953
},
{
"epoch": 0.38,
"grad_norm": 0.8487377166748047,
"learning_rate": 7.15818725684702e-06,
"loss": 0.6133,
"step": 5954
},
{
"epoch": 0.38,
"grad_norm": 0.9325571060180664,
"learning_rate": 7.15726171671178e-06,
"loss": 0.6002,
"step": 5955
},
{
"epoch": 0.38,
"grad_norm": 0.9158957600593567,
"learning_rate": 7.156336085741755e-06,
"loss": 0.6271,
"step": 5956
},
{
"epoch": 0.38,
"grad_norm": 0.8471969962120056,
"learning_rate": 7.155410363975916e-06,
"loss": 0.5651,
"step": 5957
},
{
"epoch": 0.38,
"grad_norm": 0.8656317591667175,
"learning_rate": 7.154484551453247e-06,
"loss": 0.6275,
"step": 5958
},
{
"epoch": 0.38,
"grad_norm": 0.8509047627449036,
"learning_rate": 7.1535586482127284e-06,
"loss": 0.6528,
"step": 5959
},
{
"epoch": 0.38,
"grad_norm": 0.8533027768135071,
"learning_rate": 7.152632654293347e-06,
"loss": 0.6032,
"step": 5960
},
{
"epoch": 0.38,
"grad_norm": 0.9203348755836487,
"learning_rate": 7.151706569734091e-06,
"loss": 0.5851,
"step": 5961
},
{
"epoch": 0.38,
"grad_norm": 0.7722728848457336,
"learning_rate": 7.150780394573957e-06,
"loss": 0.5788,
"step": 5962
},
{
"epoch": 0.38,
"grad_norm": 0.873199462890625,
"learning_rate": 7.149854128851945e-06,
"loss": 0.5754,
"step": 5963
},
{
"epoch": 0.38,
"grad_norm": 0.8286789059638977,
"learning_rate": 7.148927772607053e-06,
"loss": 0.5779,
"step": 5964
},
{
"epoch": 0.38,
"grad_norm": 0.8518579602241516,
"learning_rate": 7.148001325878287e-06,
"loss": 0.5629,
"step": 5965
},
{
"epoch": 0.38,
"grad_norm": 0.9076201319694519,
"learning_rate": 7.147074788704659e-06,
"loss": 0.608,
"step": 5966
},
{
"epoch": 0.38,
"grad_norm": 0.9196124076843262,
"learning_rate": 7.14614816112518e-06,
"loss": 0.5983,
"step": 5967
},
{
"epoch": 0.38,
"grad_norm": 0.8748944997787476,
"learning_rate": 7.145221443178868e-06,
"loss": 0.6094,
"step": 5968
},
{
"epoch": 0.38,
"grad_norm": 0.9023792147636414,
"learning_rate": 7.144294634904744e-06,
"loss": 0.6188,
"step": 5969
},
{
"epoch": 0.38,
"grad_norm": 0.9357802271842957,
"learning_rate": 7.143367736341832e-06,
"loss": 0.6842,
"step": 5970
},
{
"epoch": 0.38,
"grad_norm": 0.9043236970901489,
"learning_rate": 7.142440747529161e-06,
"loss": 0.6354,
"step": 5971
},
{
"epoch": 0.38,
"grad_norm": 0.9322927594184875,
"learning_rate": 7.141513668505764e-06,
"loss": 0.5922,
"step": 5972
},
{
"epoch": 0.38,
"grad_norm": 0.8984158635139465,
"learning_rate": 7.140586499310674e-06,
"loss": 0.5912,
"step": 5973
},
{
"epoch": 0.38,
"grad_norm": 0.8156484961509705,
"learning_rate": 7.139659239982935e-06,
"loss": 0.5413,
"step": 5974
},
{
"epoch": 0.38,
"grad_norm": 0.8405022621154785,
"learning_rate": 7.138731890561589e-06,
"loss": 0.586,
"step": 5975
},
{
"epoch": 0.38,
"grad_norm": 0.8600237965583801,
"learning_rate": 7.1378044510856814e-06,
"loss": 0.5976,
"step": 5976
},
{
"epoch": 0.38,
"grad_norm": 0.8850138783454895,
"learning_rate": 7.136876921594267e-06,
"loss": 0.6245,
"step": 5977
},
{
"epoch": 0.38,
"grad_norm": 0.9403291344642639,
"learning_rate": 7.1359493021263986e-06,
"loss": 0.6494,
"step": 5978
},
{
"epoch": 0.38,
"grad_norm": 0.8556556701660156,
"learning_rate": 7.135021592721134e-06,
"loss": 0.5771,
"step": 5979
},
{
"epoch": 0.38,
"grad_norm": 0.8727120757102966,
"learning_rate": 7.134093793417539e-06,
"loss": 0.6104,
"step": 5980
},
{
"epoch": 0.38,
"grad_norm": 0.8781840205192566,
"learning_rate": 7.133165904254677e-06,
"loss": 0.5915,
"step": 5981
},
{
"epoch": 0.38,
"grad_norm": 0.9176463484764099,
"learning_rate": 7.132237925271621e-06,
"loss": 0.5915,
"step": 5982
},
{
"epoch": 0.38,
"grad_norm": 0.8665004968643188,
"learning_rate": 7.131309856507444e-06,
"loss": 0.6643,
"step": 5983
},
{
"epoch": 0.38,
"grad_norm": 0.9312930107116699,
"learning_rate": 7.13038169800122e-06,
"loss": 0.6334,
"step": 5984
},
{
"epoch": 0.38,
"grad_norm": 0.9924306869506836,
"learning_rate": 7.129453449792036e-06,
"loss": 0.6339,
"step": 5985
},
{
"epoch": 0.38,
"grad_norm": 0.8924956917762756,
"learning_rate": 7.1285251119189754e-06,
"loss": 0.5738,
"step": 5986
},
{
"epoch": 0.38,
"grad_norm": 0.997128963470459,
"learning_rate": 7.127596684421127e-06,
"loss": 0.6045,
"step": 5987
},
{
"epoch": 0.38,
"grad_norm": 0.8882451057434082,
"learning_rate": 7.126668167337583e-06,
"loss": 0.589,
"step": 5988
},
{
"epoch": 0.38,
"grad_norm": 0.855974018573761,
"learning_rate": 7.12573956070744e-06,
"loss": 0.6437,
"step": 5989
},
{
"epoch": 0.38,
"grad_norm": 0.885186493396759,
"learning_rate": 7.1248108645698e-06,
"loss": 0.6057,
"step": 5990
},
{
"epoch": 0.38,
"grad_norm": 0.8319755792617798,
"learning_rate": 7.123882078963766e-06,
"loss": 0.5789,
"step": 5991
},
{
"epoch": 0.38,
"grad_norm": 0.8926076292991638,
"learning_rate": 7.1229532039284455e-06,
"loss": 0.637,
"step": 5992
},
{
"epoch": 0.38,
"grad_norm": 0.9193412661552429,
"learning_rate": 7.122024239502951e-06,
"loss": 0.5881,
"step": 5993
},
{
"epoch": 0.38,
"grad_norm": 0.9050919413566589,
"learning_rate": 7.121095185726399e-06,
"loss": 0.6494,
"step": 5994
},
{
"epoch": 0.38,
"grad_norm": 0.8967909812927246,
"learning_rate": 7.120166042637906e-06,
"loss": 0.6335,
"step": 5995
},
{
"epoch": 0.38,
"grad_norm": 0.8294476866722107,
"learning_rate": 7.119236810276598e-06,
"loss": 0.5503,
"step": 5996
},
{
"epoch": 0.38,
"grad_norm": 0.8650161027908325,
"learning_rate": 7.118307488681598e-06,
"loss": 0.6328,
"step": 5997
},
{
"epoch": 0.38,
"grad_norm": 0.8785965442657471,
"learning_rate": 7.11737807789204e-06,
"loss": 0.58,
"step": 5998
},
{
"epoch": 0.38,
"grad_norm": 0.9463037252426147,
"learning_rate": 7.116448577947057e-06,
"loss": 0.5731,
"step": 5999
},
{
"epoch": 0.38,
"grad_norm": 0.8291397094726562,
"learning_rate": 7.115518988885785e-06,
"loss": 0.5948,
"step": 6000
},
{
"epoch": 0.38,
"grad_norm": 0.9187091588973999,
"learning_rate": 7.114589310747371e-06,
"loss": 0.6384,
"step": 6001
},
{
"epoch": 0.38,
"grad_norm": 0.8593400716781616,
"learning_rate": 7.113659543570956e-06,
"loss": 0.625,
"step": 6002
},
{
"epoch": 0.38,
"grad_norm": 0.8747579455375671,
"learning_rate": 7.11272968739569e-06,
"loss": 0.5569,
"step": 6003
},
{
"epoch": 0.38,
"grad_norm": 0.8783309459686279,
"learning_rate": 7.1117997422607264e-06,
"loss": 0.5986,
"step": 6004
},
{
"epoch": 0.38,
"grad_norm": 0.8772686123847961,
"learning_rate": 7.110869708205224e-06,
"loss": 0.5752,
"step": 6005
},
{
"epoch": 0.38,
"grad_norm": 0.8766029477119446,
"learning_rate": 7.109939585268339e-06,
"loss": 0.6299,
"step": 6006
},
{
"epoch": 0.38,
"grad_norm": 0.8981195688247681,
"learning_rate": 7.109009373489239e-06,
"loss": 0.6076,
"step": 6007
},
{
"epoch": 0.38,
"grad_norm": 0.8908311128616333,
"learning_rate": 7.10807907290709e-06,
"loss": 0.651,
"step": 6008
},
{
"epoch": 0.38,
"grad_norm": 0.9420418739318848,
"learning_rate": 7.107148683561066e-06,
"loss": 0.5797,
"step": 6009
},
{
"epoch": 0.38,
"grad_norm": 0.9113646149635315,
"learning_rate": 7.106218205490342e-06,
"loss": 0.6277,
"step": 6010
},
{
"epoch": 0.38,
"grad_norm": 0.9516562223434448,
"learning_rate": 7.105287638734093e-06,
"loss": 0.6429,
"step": 6011
},
{
"epoch": 0.38,
"grad_norm": 0.9115347862243652,
"learning_rate": 7.104356983331509e-06,
"loss": 0.6106,
"step": 6012
},
{
"epoch": 0.38,
"grad_norm": 0.9286765456199646,
"learning_rate": 7.1034262393217705e-06,
"loss": 0.5787,
"step": 6013
},
{
"epoch": 0.38,
"grad_norm": 0.9642840027809143,
"learning_rate": 7.1024954067440725e-06,
"loss": 0.6729,
"step": 6014
},
{
"epoch": 0.38,
"grad_norm": 0.9277244806289673,
"learning_rate": 7.101564485637603e-06,
"loss": 0.601,
"step": 6015
},
{
"epoch": 0.38,
"grad_norm": 0.8856588006019592,
"learning_rate": 7.1006334760415674e-06,
"loss": 0.6001,
"step": 6016
},
{
"epoch": 0.38,
"grad_norm": 0.916569709777832,
"learning_rate": 7.0997023779951625e-06,
"loss": 0.6309,
"step": 6017
},
{
"epoch": 0.38,
"grad_norm": 0.9436630606651306,
"learning_rate": 7.098771191537596e-06,
"loss": 0.6716,
"step": 6018
},
{
"epoch": 0.38,
"grad_norm": 0.897139847278595,
"learning_rate": 7.097839916708073e-06,
"loss": 0.6179,
"step": 6019
},
{
"epoch": 0.38,
"grad_norm": 1.0071852207183838,
"learning_rate": 7.096908553545812e-06,
"loss": 0.627,
"step": 6020
},
{
"epoch": 0.38,
"grad_norm": 0.830710232257843,
"learning_rate": 7.095977102090025e-06,
"loss": 0.6087,
"step": 6021
},
{
"epoch": 0.38,
"grad_norm": 0.9118586182594299,
"learning_rate": 7.095045562379934e-06,
"loss": 0.5829,
"step": 6022
},
{
"epoch": 0.38,
"grad_norm": 0.8319807052612305,
"learning_rate": 7.0941139344547605e-06,
"loss": 0.5733,
"step": 6023
},
{
"epoch": 0.38,
"grad_norm": 0.8906463980674744,
"learning_rate": 7.093182218353737e-06,
"loss": 0.6338,
"step": 6024
},
{
"epoch": 0.38,
"grad_norm": 0.8869120478630066,
"learning_rate": 7.092250414116091e-06,
"loss": 0.5613,
"step": 6025
},
{
"epoch": 0.38,
"grad_norm": 0.8718534111976624,
"learning_rate": 7.091318521781058e-06,
"loss": 0.5957,
"step": 6026
},
{
"epoch": 0.38,
"grad_norm": 0.8886241912841797,
"learning_rate": 7.090386541387878e-06,
"loss": 0.6346,
"step": 6027
},
{
"epoch": 0.38,
"grad_norm": 0.8198200464248657,
"learning_rate": 7.089454472975792e-06,
"loss": 0.5945,
"step": 6028
},
{
"epoch": 0.38,
"grad_norm": 0.8481683135032654,
"learning_rate": 7.088522316584048e-06,
"loss": 0.5899,
"step": 6029
},
{
"epoch": 0.38,
"grad_norm": 0.8683075904846191,
"learning_rate": 7.087590072251893e-06,
"loss": 0.5804,
"step": 6030
},
{
"epoch": 0.38,
"grad_norm": 0.8363116383552551,
"learning_rate": 7.086657740018582e-06,
"loss": 0.6085,
"step": 6031
},
{
"epoch": 0.38,
"grad_norm": 0.8278794288635254,
"learning_rate": 7.085725319923373e-06,
"loss": 0.5597,
"step": 6032
},
{
"epoch": 0.38,
"grad_norm": 0.9533769488334656,
"learning_rate": 7.084792812005528e-06,
"loss": 0.6417,
"step": 6033
},
{
"epoch": 0.38,
"grad_norm": 0.9329741597175598,
"learning_rate": 7.083860216304309e-06,
"loss": 0.6205,
"step": 6034
},
{
"epoch": 0.38,
"grad_norm": 0.9326625466346741,
"learning_rate": 7.082927532858985e-06,
"loss": 0.5771,
"step": 6035
},
{
"epoch": 0.38,
"grad_norm": 0.9433557987213135,
"learning_rate": 7.0819947617088294e-06,
"loss": 0.5943,
"step": 6036
},
{
"epoch": 0.38,
"grad_norm": 0.9084176421165466,
"learning_rate": 7.081061902893117e-06,
"loss": 0.6308,
"step": 6037
},
{
"epoch": 0.38,
"grad_norm": 1.0079909563064575,
"learning_rate": 7.080128956451125e-06,
"loss": 0.5854,
"step": 6038
},
{
"epoch": 0.38,
"grad_norm": 0.9684156179428101,
"learning_rate": 7.079195922422143e-06,
"loss": 0.6058,
"step": 6039
},
{
"epoch": 0.38,
"grad_norm": 0.8209320902824402,
"learning_rate": 7.078262800845453e-06,
"loss": 0.5948,
"step": 6040
},
{
"epoch": 0.38,
"grad_norm": 0.8796716928482056,
"learning_rate": 7.0773295917603445e-06,
"loss": 0.5924,
"step": 6041
},
{
"epoch": 0.38,
"grad_norm": 0.8752491474151611,
"learning_rate": 7.076396295206113e-06,
"loss": 0.5695,
"step": 6042
},
{
"epoch": 0.38,
"grad_norm": 0.9148269891738892,
"learning_rate": 7.075462911222057e-06,
"loss": 0.5703,
"step": 6043
},
{
"epoch": 0.38,
"grad_norm": 0.8726043701171875,
"learning_rate": 7.07452943984748e-06,
"loss": 0.5915,
"step": 6044
},
{
"epoch": 0.38,
"grad_norm": 0.8048043847084045,
"learning_rate": 7.073595881121683e-06,
"loss": 0.5756,
"step": 6045
},
{
"epoch": 0.38,
"grad_norm": 0.9457216262817383,
"learning_rate": 7.072662235083977e-06,
"loss": 0.6594,
"step": 6046
},
{
"epoch": 0.38,
"grad_norm": 0.9144176840782166,
"learning_rate": 7.071728501773675e-06,
"loss": 0.6171,
"step": 6047
},
{
"epoch": 0.38,
"grad_norm": 0.9629214406013489,
"learning_rate": 7.070794681230093e-06,
"loss": 0.6228,
"step": 6048
},
{
"epoch": 0.38,
"grad_norm": 0.9110321402549744,
"learning_rate": 7.06986077349255e-06,
"loss": 0.6194,
"step": 6049
},
{
"epoch": 0.38,
"grad_norm": 0.8530512452125549,
"learning_rate": 7.068926778600372e-06,
"loss": 0.6007,
"step": 6050
},
{
"epoch": 0.38,
"grad_norm": 0.8579297661781311,
"learning_rate": 7.067992696592882e-06,
"loss": 0.6179,
"step": 6051
},
{
"epoch": 0.38,
"grad_norm": 0.9101974368095398,
"learning_rate": 7.067058527509416e-06,
"loss": 0.5758,
"step": 6052
},
{
"epoch": 0.38,
"grad_norm": 0.92631995677948,
"learning_rate": 7.066124271389305e-06,
"loss": 0.5397,
"step": 6053
},
{
"epoch": 0.38,
"grad_norm": 0.8497442603111267,
"learning_rate": 7.0651899282718896e-06,
"loss": 0.64,
"step": 6054
},
{
"epoch": 0.38,
"grad_norm": 0.9552360773086548,
"learning_rate": 7.064255498196509e-06,
"loss": 0.6108,
"step": 6055
},
{
"epoch": 0.38,
"grad_norm": 0.9272350668907166,
"learning_rate": 7.0633209812025116e-06,
"loss": 0.6146,
"step": 6056
},
{
"epoch": 0.38,
"grad_norm": 0.9053919315338135,
"learning_rate": 7.062386377329245e-06,
"loss": 0.5826,
"step": 6057
},
{
"epoch": 0.38,
"grad_norm": 0.9134330749511719,
"learning_rate": 7.061451686616062e-06,
"loss": 0.5864,
"step": 6058
},
{
"epoch": 0.38,
"grad_norm": 1.0126466751098633,
"learning_rate": 7.0605169091023205e-06,
"loss": 0.6486,
"step": 6059
},
{
"epoch": 0.38,
"grad_norm": 0.9160744547843933,
"learning_rate": 7.05958204482738e-06,
"loss": 0.5698,
"step": 6060
},
{
"epoch": 0.38,
"grad_norm": 0.924263060092926,
"learning_rate": 7.058647093830604e-06,
"loss": 0.6247,
"step": 6061
},
{
"epoch": 0.38,
"grad_norm": 0.8898268342018127,
"learning_rate": 7.0577120561513604e-06,
"loss": 0.6066,
"step": 6062
},
{
"epoch": 0.38,
"grad_norm": 0.887617826461792,
"learning_rate": 7.056776931829021e-06,
"loss": 0.5524,
"step": 6063
},
{
"epoch": 0.38,
"grad_norm": 0.899122416973114,
"learning_rate": 7.055841720902959e-06,
"loss": 0.5709,
"step": 6064
},
{
"epoch": 0.38,
"grad_norm": 0.9129178524017334,
"learning_rate": 7.054906423412554e-06,
"loss": 0.6503,
"step": 6065
},
{
"epoch": 0.38,
"grad_norm": 0.9189284443855286,
"learning_rate": 7.053971039397188e-06,
"loss": 0.645,
"step": 6066
},
{
"epoch": 0.38,
"grad_norm": 0.9444376230239868,
"learning_rate": 7.0530355688962484e-06,
"loss": 0.6305,
"step": 6067
},
{
"epoch": 0.38,
"grad_norm": 0.8827232122421265,
"learning_rate": 7.0521000119491215e-06,
"loss": 0.5677,
"step": 6068
},
{
"epoch": 0.38,
"grad_norm": 0.918749213218689,
"learning_rate": 7.0511643685952014e-06,
"loss": 0.6198,
"step": 6069
},
{
"epoch": 0.38,
"grad_norm": 0.8609430193901062,
"learning_rate": 7.050228638873886e-06,
"loss": 0.5669,
"step": 6070
},
{
"epoch": 0.38,
"grad_norm": 0.8873887658119202,
"learning_rate": 7.049292822824575e-06,
"loss": 0.6614,
"step": 6071
},
{
"epoch": 0.38,
"grad_norm": 0.8995460271835327,
"learning_rate": 7.048356920486672e-06,
"loss": 0.6257,
"step": 6072
},
{
"epoch": 0.38,
"grad_norm": 0.8707825541496277,
"learning_rate": 7.047420931899585e-06,
"loss": 0.5955,
"step": 6073
},
{
"epoch": 0.38,
"grad_norm": 0.8987425565719604,
"learning_rate": 7.0464848571027246e-06,
"loss": 0.612,
"step": 6074
},
{
"epoch": 0.38,
"grad_norm": 0.9181625247001648,
"learning_rate": 7.045548696135506e-06,
"loss": 0.5886,
"step": 6075
},
{
"epoch": 0.38,
"grad_norm": 0.9375488758087158,
"learning_rate": 7.044612449037348e-06,
"loss": 0.6653,
"step": 6076
},
{
"epoch": 0.39,
"grad_norm": 0.8560453057289124,
"learning_rate": 7.0436761158476715e-06,
"loss": 0.601,
"step": 6077
},
{
"epoch": 0.39,
"grad_norm": 0.8806048035621643,
"learning_rate": 7.042739696605905e-06,
"loss": 0.588,
"step": 6078
},
{
"epoch": 0.39,
"grad_norm": 0.8374508619308472,
"learning_rate": 7.041803191351475e-06,
"loss": 0.5957,
"step": 6079
},
{
"epoch": 0.39,
"grad_norm": 0.888600766658783,
"learning_rate": 7.040866600123816e-06,
"loss": 0.6332,
"step": 6080
},
{
"epoch": 0.39,
"grad_norm": 0.9433616399765015,
"learning_rate": 7.039929922962363e-06,
"loss": 0.6253,
"step": 6081
},
{
"epoch": 0.39,
"grad_norm": 0.920203447341919,
"learning_rate": 7.038993159906558e-06,
"loss": 0.6191,
"step": 6082
},
{
"epoch": 0.39,
"grad_norm": 0.8820478916168213,
"learning_rate": 7.0380563109958445e-06,
"loss": 0.5925,
"step": 6083
},
{
"epoch": 0.39,
"grad_norm": 0.8885151147842407,
"learning_rate": 7.03711937626967e-06,
"loss": 0.6106,
"step": 6084
},
{
"epoch": 0.39,
"grad_norm": 0.8442419767379761,
"learning_rate": 7.036182355767485e-06,
"loss": 0.5689,
"step": 6085
},
{
"epoch": 0.39,
"grad_norm": 0.8483255505561829,
"learning_rate": 7.0352452495287435e-06,
"loss": 0.594,
"step": 6086
},
{
"epoch": 0.39,
"grad_norm": 0.8738897442817688,
"learning_rate": 7.034308057592907e-06,
"loss": 0.5607,
"step": 6087
},
{
"epoch": 0.39,
"grad_norm": 0.8586130738258362,
"learning_rate": 7.033370779999431e-06,
"loss": 0.641,
"step": 6088
},
{
"epoch": 0.39,
"grad_norm": 0.8719096183776855,
"learning_rate": 7.032433416787788e-06,
"loss": 0.6118,
"step": 6089
},
{
"epoch": 0.39,
"grad_norm": 0.8584408164024353,
"learning_rate": 7.031495967997444e-06,
"loss": 0.5459,
"step": 6090
},
{
"epoch": 0.39,
"grad_norm": 0.8801223635673523,
"learning_rate": 7.0305584336678715e-06,
"loss": 0.5927,
"step": 6091
},
{
"epoch": 0.39,
"grad_norm": 0.7907819151878357,
"learning_rate": 7.029620813838544e-06,
"loss": 0.5669,
"step": 6092
},
{
"epoch": 0.39,
"grad_norm": 0.8615099191665649,
"learning_rate": 7.02868310854895e-06,
"loss": 0.5749,
"step": 6093
},
{
"epoch": 0.39,
"grad_norm": 0.8567502498626709,
"learning_rate": 7.027745317838564e-06,
"loss": 0.6005,
"step": 6094
},
{
"epoch": 0.39,
"grad_norm": 0.8757819533348083,
"learning_rate": 7.026807441746879e-06,
"loss": 0.6121,
"step": 6095
},
{
"epoch": 0.39,
"grad_norm": 0.8814988732337952,
"learning_rate": 7.025869480313381e-06,
"loss": 0.6107,
"step": 6096
},
{
"epoch": 0.39,
"grad_norm": 0.8909090757369995,
"learning_rate": 7.0249314335775675e-06,
"loss": 0.5844,
"step": 6097
},
{
"epoch": 0.39,
"grad_norm": 0.8157296776771545,
"learning_rate": 7.023993301578935e-06,
"loss": 0.5491,
"step": 6098
},
{
"epoch": 0.39,
"grad_norm": 0.8694610595703125,
"learning_rate": 7.023055084356987e-06,
"loss": 0.5522,
"step": 6099
},
{
"epoch": 0.39,
"grad_norm": 0.863211989402771,
"learning_rate": 7.022116781951226e-06,
"loss": 0.6082,
"step": 6100
},
{
"epoch": 0.39,
"grad_norm": 0.8649691939353943,
"learning_rate": 7.021178394401162e-06,
"loss": 0.6182,
"step": 6101
},
{
"epoch": 0.39,
"grad_norm": 0.8408727049827576,
"learning_rate": 7.020239921746308e-06,
"loss": 0.6067,
"step": 6102
},
{
"epoch": 0.39,
"grad_norm": 0.920093297958374,
"learning_rate": 7.019301364026178e-06,
"loss": 0.6312,
"step": 6103
},
{
"epoch": 0.39,
"grad_norm": 0.9169816374778748,
"learning_rate": 7.018362721280292e-06,
"loss": 0.5826,
"step": 6104
},
{
"epoch": 0.39,
"grad_norm": 0.9006035327911377,
"learning_rate": 7.0174239935481735e-06,
"loss": 0.6304,
"step": 6105
},
{
"epoch": 0.39,
"grad_norm": 0.8806290626525879,
"learning_rate": 7.016485180869349e-06,
"loss": 0.582,
"step": 6106
},
{
"epoch": 0.39,
"grad_norm": 0.9303503036499023,
"learning_rate": 7.015546283283346e-06,
"loss": 0.5657,
"step": 6107
},
{
"epoch": 0.39,
"grad_norm": 0.9579445123672485,
"learning_rate": 7.014607300829703e-06,
"loss": 0.6414,
"step": 6108
},
{
"epoch": 0.39,
"grad_norm": 0.8906927108764648,
"learning_rate": 7.013668233547955e-06,
"loss": 0.5922,
"step": 6109
},
{
"epoch": 0.39,
"grad_norm": 0.866254448890686,
"learning_rate": 7.0127290814776424e-06,
"loss": 0.5812,
"step": 6110
},
{
"epoch": 0.39,
"grad_norm": 0.9502847194671631,
"learning_rate": 7.0117898446583084e-06,
"loss": 0.6458,
"step": 6111
},
{
"epoch": 0.39,
"grad_norm": 0.8791959285736084,
"learning_rate": 7.010850523129504e-06,
"loss": 0.5782,
"step": 6112
},
{
"epoch": 0.39,
"grad_norm": 0.8798953294754028,
"learning_rate": 7.009911116930779e-06,
"loss": 0.6134,
"step": 6113
},
{
"epoch": 0.39,
"grad_norm": 0.9204188585281372,
"learning_rate": 7.00897162610169e-06,
"loss": 0.6289,
"step": 6114
},
{
"epoch": 0.39,
"grad_norm": 0.9081289172172546,
"learning_rate": 7.0080320506817926e-06,
"loss": 0.5684,
"step": 6115
},
{
"epoch": 0.39,
"grad_norm": 0.8632351160049438,
"learning_rate": 7.007092390710652e-06,
"loss": 0.5876,
"step": 6116
},
{
"epoch": 0.39,
"grad_norm": 0.8665913939476013,
"learning_rate": 7.006152646227833e-06,
"loss": 0.5125,
"step": 6117
},
{
"epoch": 0.39,
"grad_norm": 0.8278458118438721,
"learning_rate": 7.005212817272905e-06,
"loss": 0.6409,
"step": 6118
},
{
"epoch": 0.39,
"grad_norm": 0.9356765151023865,
"learning_rate": 7.0042729038854405e-06,
"loss": 0.6375,
"step": 6119
},
{
"epoch": 0.39,
"grad_norm": 0.8514903783798218,
"learning_rate": 7.003332906105016e-06,
"loss": 0.6365,
"step": 6120
},
{
"epoch": 0.39,
"grad_norm": 0.9448802471160889,
"learning_rate": 7.002392823971214e-06,
"loss": 0.6518,
"step": 6121
},
{
"epoch": 0.39,
"grad_norm": 0.8504220247268677,
"learning_rate": 7.001452657523614e-06,
"loss": 0.503,
"step": 6122
},
{
"epoch": 0.39,
"grad_norm": 0.9173238277435303,
"learning_rate": 7.000512406801805e-06,
"loss": 0.6124,
"step": 6123
},
{
"epoch": 0.39,
"grad_norm": 0.8627074956893921,
"learning_rate": 6.9995720718453786e-06,
"loss": 0.5751,
"step": 6124
},
{
"epoch": 0.39,
"grad_norm": 0.997188925743103,
"learning_rate": 6.998631652693928e-06,
"loss": 0.6455,
"step": 6125
},
{
"epoch": 0.39,
"grad_norm": 0.8653777837753296,
"learning_rate": 6.997691149387052e-06,
"loss": 0.5966,
"step": 6126
},
{
"epoch": 0.39,
"grad_norm": 0.8478190302848816,
"learning_rate": 6.99675056196435e-06,
"loss": 0.5918,
"step": 6127
},
{
"epoch": 0.39,
"grad_norm": 0.8888818025588989,
"learning_rate": 6.995809890465428e-06,
"loss": 0.5978,
"step": 6128
},
{
"epoch": 0.39,
"grad_norm": 0.8966024518013,
"learning_rate": 6.994869134929895e-06,
"loss": 0.6194,
"step": 6129
},
{
"epoch": 0.39,
"grad_norm": 0.8759685158729553,
"learning_rate": 6.993928295397363e-06,
"loss": 0.6068,
"step": 6130
},
{
"epoch": 0.39,
"grad_norm": 0.8871753215789795,
"learning_rate": 6.992987371907446e-06,
"loss": 0.6187,
"step": 6131
},
{
"epoch": 0.39,
"grad_norm": 0.8602596521377563,
"learning_rate": 6.992046364499764e-06,
"loss": 0.5815,
"step": 6132
},
{
"epoch": 0.39,
"grad_norm": 0.8757937550544739,
"learning_rate": 6.991105273213939e-06,
"loss": 0.5496,
"step": 6133
},
{
"epoch": 0.39,
"grad_norm": 0.8693877458572388,
"learning_rate": 6.990164098089598e-06,
"loss": 0.6058,
"step": 6134
},
{
"epoch": 0.39,
"grad_norm": 0.8464959263801575,
"learning_rate": 6.9892228391663694e-06,
"loss": 0.5767,
"step": 6135
},
{
"epoch": 0.39,
"grad_norm": 0.8602965474128723,
"learning_rate": 6.988281496483888e-06,
"loss": 0.6125,
"step": 6136
},
{
"epoch": 0.39,
"grad_norm": 0.9073672294616699,
"learning_rate": 6.987340070081789e-06,
"loss": 0.6005,
"step": 6137
},
{
"epoch": 0.39,
"grad_norm": 0.9364018440246582,
"learning_rate": 6.986398559999714e-06,
"loss": 0.5963,
"step": 6138
},
{
"epoch": 0.39,
"grad_norm": 0.875133216381073,
"learning_rate": 6.9854569662773044e-06,
"loss": 0.5463,
"step": 6139
},
{
"epoch": 0.39,
"grad_norm": 0.934817373752594,
"learning_rate": 6.984515288954211e-06,
"loss": 0.6034,
"step": 6140
},
{
"epoch": 0.39,
"grad_norm": 0.9065064191818237,
"learning_rate": 6.98357352807008e-06,
"loss": 0.5662,
"step": 6141
},
{
"epoch": 0.39,
"grad_norm": 0.813168466091156,
"learning_rate": 6.982631683664569e-06,
"loss": 0.5632,
"step": 6142
},
{
"epoch": 0.39,
"grad_norm": 0.8873375654220581,
"learning_rate": 6.981689755777335e-06,
"loss": 0.5367,
"step": 6143
},
{
"epoch": 0.39,
"grad_norm": 0.8773168325424194,
"learning_rate": 6.98074774444804e-06,
"loss": 0.5601,
"step": 6144
},
{
"epoch": 0.39,
"grad_norm": 0.8021374344825745,
"learning_rate": 6.979805649716347e-06,
"loss": 0.5076,
"step": 6145
},
{
"epoch": 0.39,
"grad_norm": 0.8933539986610413,
"learning_rate": 6.978863471621925e-06,
"loss": 0.6894,
"step": 6146
},
{
"epoch": 0.39,
"grad_norm": 0.8887168169021606,
"learning_rate": 6.977921210204446e-06,
"loss": 0.647,
"step": 6147
},
{
"epoch": 0.39,
"grad_norm": 0.8803666234016418,
"learning_rate": 6.9769788655035875e-06,
"loss": 0.5892,
"step": 6148
},
{
"epoch": 0.39,
"grad_norm": 0.9113365411758423,
"learning_rate": 6.976036437559024e-06,
"loss": 0.6732,
"step": 6149
},
{
"epoch": 0.39,
"grad_norm": 0.8204461932182312,
"learning_rate": 6.975093926410441e-06,
"loss": 0.5916,
"step": 6150
},
{
"epoch": 0.39,
"grad_norm": 0.934197187423706,
"learning_rate": 6.974151332097525e-06,
"loss": 0.6305,
"step": 6151
},
{
"epoch": 0.39,
"grad_norm": 0.9386470913887024,
"learning_rate": 6.973208654659962e-06,
"loss": 0.6485,
"step": 6152
},
{
"epoch": 0.39,
"grad_norm": 0.9400019645690918,
"learning_rate": 6.9722658941374475e-06,
"loss": 0.5726,
"step": 6153
},
{
"epoch": 0.39,
"grad_norm": 0.8022521734237671,
"learning_rate": 6.971323050569677e-06,
"loss": 0.593,
"step": 6154
},
{
"epoch": 0.39,
"grad_norm": 0.8721299171447754,
"learning_rate": 6.970380123996352e-06,
"loss": 0.5738,
"step": 6155
},
{
"epoch": 0.39,
"grad_norm": 0.9494243264198303,
"learning_rate": 6.969437114457174e-06,
"loss": 0.6282,
"step": 6156
},
{
"epoch": 0.39,
"grad_norm": 0.8277761936187744,
"learning_rate": 6.968494021991848e-06,
"loss": 0.5913,
"step": 6157
},
{
"epoch": 0.39,
"grad_norm": 0.854987621307373,
"learning_rate": 6.967550846640089e-06,
"loss": 0.5491,
"step": 6158
},
{
"epoch": 0.39,
"grad_norm": 0.9130845665931702,
"learning_rate": 6.966607588441609e-06,
"loss": 0.6274,
"step": 6159
},
{
"epoch": 0.39,
"grad_norm": 0.8112385869026184,
"learning_rate": 6.9656642474361225e-06,
"loss": 0.5309,
"step": 6160
},
{
"epoch": 0.39,
"grad_norm": 0.8674074411392212,
"learning_rate": 6.964720823663353e-06,
"loss": 0.6072,
"step": 6161
},
{
"epoch": 0.39,
"grad_norm": 0.9010210633277893,
"learning_rate": 6.963777317163025e-06,
"loss": 0.604,
"step": 6162
},
{
"epoch": 0.39,
"grad_norm": 0.8281999230384827,
"learning_rate": 6.962833727974867e-06,
"loss": 0.5805,
"step": 6163
},
{
"epoch": 0.39,
"grad_norm": 0.879539966583252,
"learning_rate": 6.961890056138607e-06,
"loss": 0.5993,
"step": 6164
},
{
"epoch": 0.39,
"grad_norm": 0.9275795221328735,
"learning_rate": 6.9609463016939816e-06,
"loss": 0.6101,
"step": 6165
},
{
"epoch": 0.39,
"grad_norm": 0.8362293839454651,
"learning_rate": 6.960002464680731e-06,
"loss": 0.5565,
"step": 6166
},
{
"epoch": 0.39,
"grad_norm": 0.8443682193756104,
"learning_rate": 6.959058545138593e-06,
"loss": 0.5736,
"step": 6167
},
{
"epoch": 0.39,
"grad_norm": 0.9468548893928528,
"learning_rate": 6.958114543107315e-06,
"loss": 0.6321,
"step": 6168
},
{
"epoch": 0.39,
"grad_norm": 0.8098998069763184,
"learning_rate": 6.957170458626645e-06,
"loss": 0.552,
"step": 6169
},
{
"epoch": 0.39,
"grad_norm": 0.9221862554550171,
"learning_rate": 6.956226291736338e-06,
"loss": 0.6174,
"step": 6170
},
{
"epoch": 0.39,
"grad_norm": 0.8823233246803284,
"learning_rate": 6.955282042476144e-06,
"loss": 0.5788,
"step": 6171
},
{
"epoch": 0.39,
"grad_norm": 0.8700152039527893,
"learning_rate": 6.9543377108858265e-06,
"loss": 0.6143,
"step": 6172
},
{
"epoch": 0.39,
"grad_norm": 0.866326093673706,
"learning_rate": 6.9533932970051465e-06,
"loss": 0.586,
"step": 6173
},
{
"epoch": 0.39,
"grad_norm": 0.9445212483406067,
"learning_rate": 6.952448800873871e-06,
"loss": 0.6754,
"step": 6174
},
{
"epoch": 0.39,
"grad_norm": 0.9050667881965637,
"learning_rate": 6.951504222531768e-06,
"loss": 0.6266,
"step": 6175
},
{
"epoch": 0.39,
"grad_norm": 0.8842514157295227,
"learning_rate": 6.950559562018611e-06,
"loss": 0.6103,
"step": 6176
},
{
"epoch": 0.39,
"grad_norm": 0.8354772329330444,
"learning_rate": 6.949614819374175e-06,
"loss": 0.5891,
"step": 6177
},
{
"epoch": 0.39,
"grad_norm": 0.8761371970176697,
"learning_rate": 6.948669994638243e-06,
"loss": 0.6099,
"step": 6178
},
{
"epoch": 0.39,
"grad_norm": 0.827156126499176,
"learning_rate": 6.947725087850595e-06,
"loss": 0.5347,
"step": 6179
},
{
"epoch": 0.39,
"grad_norm": 0.8923287987709045,
"learning_rate": 6.94678009905102e-06,
"loss": 0.5873,
"step": 6180
},
{
"epoch": 0.39,
"grad_norm": 0.865619421005249,
"learning_rate": 6.945835028279308e-06,
"loss": 0.6504,
"step": 6181
},
{
"epoch": 0.39,
"grad_norm": 0.8588405251502991,
"learning_rate": 6.944889875575251e-06,
"loss": 0.5939,
"step": 6182
},
{
"epoch": 0.39,
"grad_norm": 0.8965503573417664,
"learning_rate": 6.943944640978648e-06,
"loss": 0.6188,
"step": 6183
},
{
"epoch": 0.39,
"grad_norm": 0.8754391670227051,
"learning_rate": 6.942999324529297e-06,
"loss": 0.5729,
"step": 6184
},
{
"epoch": 0.39,
"grad_norm": 0.873710036277771,
"learning_rate": 6.942053926267005e-06,
"loss": 0.5963,
"step": 6185
},
{
"epoch": 0.39,
"grad_norm": 0.8937984704971313,
"learning_rate": 6.941108446231578e-06,
"loss": 0.5968,
"step": 6186
},
{
"epoch": 0.39,
"grad_norm": 0.8646506071090698,
"learning_rate": 6.940162884462828e-06,
"loss": 0.5911,
"step": 6187
},
{
"epoch": 0.39,
"grad_norm": 0.8940115571022034,
"learning_rate": 6.9392172410005656e-06,
"loss": 0.6188,
"step": 6188
},
{
"epoch": 0.39,
"grad_norm": 0.8401895761489868,
"learning_rate": 6.9382715158846135e-06,
"loss": 0.5936,
"step": 6189
},
{
"epoch": 0.39,
"grad_norm": 0.8863813281059265,
"learning_rate": 6.93732570915479e-06,
"loss": 0.5897,
"step": 6190
},
{
"epoch": 0.39,
"grad_norm": 0.9222760796546936,
"learning_rate": 6.93637982085092e-06,
"loss": 0.6047,
"step": 6191
},
{
"epoch": 0.39,
"grad_norm": 0.8968461751937866,
"learning_rate": 6.9354338510128315e-06,
"loss": 0.5943,
"step": 6192
},
{
"epoch": 0.39,
"grad_norm": 0.9590244293212891,
"learning_rate": 6.934487799680357e-06,
"loss": 0.6274,
"step": 6193
},
{
"epoch": 0.39,
"grad_norm": 0.8756579756736755,
"learning_rate": 6.933541666893331e-06,
"loss": 0.6139,
"step": 6194
},
{
"epoch": 0.39,
"grad_norm": 0.921607494354248,
"learning_rate": 6.932595452691592e-06,
"loss": 0.64,
"step": 6195
},
{
"epoch": 0.39,
"grad_norm": 0.8667705059051514,
"learning_rate": 6.9316491571149815e-06,
"loss": 0.6098,
"step": 6196
},
{
"epoch": 0.39,
"grad_norm": 0.8910043835639954,
"learning_rate": 6.930702780203344e-06,
"loss": 0.6432,
"step": 6197
},
{
"epoch": 0.39,
"grad_norm": 0.9581403732299805,
"learning_rate": 6.929756321996529e-06,
"loss": 0.6453,
"step": 6198
},
{
"epoch": 0.39,
"grad_norm": 0.8930731415748596,
"learning_rate": 6.928809782534388e-06,
"loss": 0.6059,
"step": 6199
},
{
"epoch": 0.39,
"grad_norm": 0.9078335762023926,
"learning_rate": 6.927863161856778e-06,
"loss": 0.5956,
"step": 6200
},
{
"epoch": 0.39,
"grad_norm": 0.8804222345352173,
"learning_rate": 6.9269164600035555e-06,
"loss": 0.5862,
"step": 6201
},
{
"epoch": 0.39,
"grad_norm": 0.8888744711875916,
"learning_rate": 6.925969677014585e-06,
"loss": 0.6367,
"step": 6202
},
{
"epoch": 0.39,
"grad_norm": 0.9415931105613708,
"learning_rate": 6.92502281292973e-06,
"loss": 0.5966,
"step": 6203
},
{
"epoch": 0.39,
"grad_norm": 0.8707212209701538,
"learning_rate": 6.924075867788863e-06,
"loss": 0.6106,
"step": 6204
},
{
"epoch": 0.39,
"grad_norm": 0.866563081741333,
"learning_rate": 6.923128841631854e-06,
"loss": 0.5493,
"step": 6205
},
{
"epoch": 0.39,
"grad_norm": 0.9359866976737976,
"learning_rate": 6.92218173449858e-06,
"loss": 0.5749,
"step": 6206
},
{
"epoch": 0.39,
"grad_norm": 0.9220528602600098,
"learning_rate": 6.921234546428918e-06,
"loss": 0.5909,
"step": 6207
},
{
"epoch": 0.39,
"grad_norm": 0.8486345410346985,
"learning_rate": 6.920287277462755e-06,
"loss": 0.5765,
"step": 6208
},
{
"epoch": 0.39,
"grad_norm": 0.8374233245849609,
"learning_rate": 6.9193399276399745e-06,
"loss": 0.5556,
"step": 6209
},
{
"epoch": 0.39,
"grad_norm": 0.8650535345077515,
"learning_rate": 6.918392497000466e-06,
"loss": 0.6162,
"step": 6210
},
{
"epoch": 0.39,
"grad_norm": 0.8010015487670898,
"learning_rate": 6.917444985584122e-06,
"loss": 0.5534,
"step": 6211
},
{
"epoch": 0.39,
"grad_norm": 0.888006865978241,
"learning_rate": 6.916497393430841e-06,
"loss": 0.6161,
"step": 6212
},
{
"epoch": 0.39,
"grad_norm": 0.8319229483604431,
"learning_rate": 6.915549720580523e-06,
"loss": 0.5842,
"step": 6213
},
{
"epoch": 0.39,
"grad_norm": 0.8947864174842834,
"learning_rate": 6.914601967073068e-06,
"loss": 0.5607,
"step": 6214
},
{
"epoch": 0.39,
"grad_norm": 0.9286026358604431,
"learning_rate": 6.913654132948385e-06,
"loss": 0.6001,
"step": 6215
},
{
"epoch": 0.39,
"grad_norm": 0.8386892676353455,
"learning_rate": 6.912706218246384e-06,
"loss": 0.5296,
"step": 6216
},
{
"epoch": 0.39,
"grad_norm": 0.8397946357727051,
"learning_rate": 6.911758223006979e-06,
"loss": 0.5952,
"step": 6217
},
{
"epoch": 0.39,
"grad_norm": 0.8822040557861328,
"learning_rate": 6.910810147270084e-06,
"loss": 0.5506,
"step": 6218
},
{
"epoch": 0.39,
"grad_norm": 0.9485325217247009,
"learning_rate": 6.909861991075622e-06,
"loss": 0.6302,
"step": 6219
},
{
"epoch": 0.39,
"grad_norm": 0.9046191573143005,
"learning_rate": 6.908913754463514e-06,
"loss": 0.6251,
"step": 6220
},
{
"epoch": 0.39,
"grad_norm": 0.9548308849334717,
"learning_rate": 6.90796543747369e-06,
"loss": 0.6542,
"step": 6221
},
{
"epoch": 0.39,
"grad_norm": 0.9304654002189636,
"learning_rate": 6.907017040146078e-06,
"loss": 0.6334,
"step": 6222
},
{
"epoch": 0.39,
"grad_norm": 0.9015122652053833,
"learning_rate": 6.906068562520613e-06,
"loss": 0.6062,
"step": 6223
},
{
"epoch": 0.39,
"grad_norm": 0.8413129448890686,
"learning_rate": 6.905120004637232e-06,
"loss": 0.5425,
"step": 6224
},
{
"epoch": 0.39,
"grad_norm": 0.9578669667243958,
"learning_rate": 6.904171366535873e-06,
"loss": 0.607,
"step": 6225
},
{
"epoch": 0.39,
"grad_norm": 0.895363450050354,
"learning_rate": 6.9032226482564835e-06,
"loss": 0.5703,
"step": 6226
},
{
"epoch": 0.39,
"grad_norm": 0.9190669059753418,
"learning_rate": 6.9022738498390084e-06,
"loss": 0.6413,
"step": 6227
},
{
"epoch": 0.39,
"grad_norm": 0.8880024552345276,
"learning_rate": 6.9013249713234e-06,
"loss": 0.6153,
"step": 6228
},
{
"epoch": 0.39,
"grad_norm": 0.8834933042526245,
"learning_rate": 6.900376012749611e-06,
"loss": 0.5887,
"step": 6229
},
{
"epoch": 0.39,
"grad_norm": 0.9798893928527832,
"learning_rate": 6.899426974157598e-06,
"loss": 0.6217,
"step": 6230
},
{
"epoch": 0.39,
"grad_norm": 0.8374887704849243,
"learning_rate": 6.898477855587323e-06,
"loss": 0.6106,
"step": 6231
},
{
"epoch": 0.39,
"grad_norm": 0.8667147159576416,
"learning_rate": 6.897528657078752e-06,
"loss": 0.5879,
"step": 6232
},
{
"epoch": 0.39,
"grad_norm": 0.928011417388916,
"learning_rate": 6.8965793786718484e-06,
"loss": 0.591,
"step": 6233
},
{
"epoch": 0.39,
"grad_norm": 0.8557186126708984,
"learning_rate": 6.895630020406584e-06,
"loss": 0.5891,
"step": 6234
},
{
"epoch": 0.4,
"grad_norm": 0.9000698328018188,
"learning_rate": 6.894680582322934e-06,
"loss": 0.6082,
"step": 6235
},
{
"epoch": 0.4,
"grad_norm": 0.8863718509674072,
"learning_rate": 6.893731064460878e-06,
"loss": 0.6171,
"step": 6236
},
{
"epoch": 0.4,
"grad_norm": 0.9076705574989319,
"learning_rate": 6.892781466860393e-06,
"loss": 0.5794,
"step": 6237
},
{
"epoch": 0.4,
"grad_norm": 0.8823980689048767,
"learning_rate": 6.891831789561465e-06,
"loss": 0.6175,
"step": 6238
},
{
"epoch": 0.4,
"grad_norm": 0.9114968776702881,
"learning_rate": 6.8908820326040815e-06,
"loss": 0.6038,
"step": 6239
},
{
"epoch": 0.4,
"grad_norm": 0.8561393618583679,
"learning_rate": 6.889932196028235e-06,
"loss": 0.6196,
"step": 6240
},
{
"epoch": 0.4,
"grad_norm": 0.9283210635185242,
"learning_rate": 6.888982279873917e-06,
"loss": 0.582,
"step": 6241
},
{
"epoch": 0.4,
"grad_norm": 0.8675887584686279,
"learning_rate": 6.888032284181127e-06,
"loss": 0.583,
"step": 6242
},
{
"epoch": 0.4,
"grad_norm": 0.9557647109031677,
"learning_rate": 6.887082208989865e-06,
"loss": 0.6167,
"step": 6243
},
{
"epoch": 0.4,
"grad_norm": 0.9393128156661987,
"learning_rate": 6.886132054340136e-06,
"loss": 0.6255,
"step": 6244
},
{
"epoch": 0.4,
"grad_norm": 0.8403303027153015,
"learning_rate": 6.885181820271947e-06,
"loss": 0.6011,
"step": 6245
},
{
"epoch": 0.4,
"grad_norm": 0.8862718343734741,
"learning_rate": 6.88423150682531e-06,
"loss": 0.6226,
"step": 6246
},
{
"epoch": 0.4,
"grad_norm": 0.9034367799758911,
"learning_rate": 6.88328111404024e-06,
"loss": 0.5662,
"step": 6247
},
{
"epoch": 0.4,
"grad_norm": 0.8718511462211609,
"learning_rate": 6.882330641956752e-06,
"loss": 0.6259,
"step": 6248
},
{
"epoch": 0.4,
"grad_norm": 0.834060549736023,
"learning_rate": 6.881380090614871e-06,
"loss": 0.5645,
"step": 6249
},
{
"epoch": 0.4,
"grad_norm": 0.9293310046195984,
"learning_rate": 6.8804294600546175e-06,
"loss": 0.6016,
"step": 6250
},
{
"epoch": 0.4,
"grad_norm": 0.9908111095428467,
"learning_rate": 6.879478750316022e-06,
"loss": 0.6271,
"step": 6251
},
{
"epoch": 0.4,
"grad_norm": 0.9571794867515564,
"learning_rate": 6.878527961439113e-06,
"loss": 0.6243,
"step": 6252
},
{
"epoch": 0.4,
"grad_norm": 0.9029168486595154,
"learning_rate": 6.877577093463927e-06,
"loss": 0.6002,
"step": 6253
},
{
"epoch": 0.4,
"grad_norm": 0.9042819738388062,
"learning_rate": 6.876626146430502e-06,
"loss": 0.5916,
"step": 6254
},
{
"epoch": 0.4,
"grad_norm": 0.9775123000144958,
"learning_rate": 6.875675120378878e-06,
"loss": 0.6199,
"step": 6255
},
{
"epoch": 0.4,
"grad_norm": 0.909796416759491,
"learning_rate": 6.8747240153491e-06,
"loss": 0.5858,
"step": 6256
},
{
"epoch": 0.4,
"grad_norm": 0.847358763217926,
"learning_rate": 6.873772831381214e-06,
"loss": 0.6043,
"step": 6257
},
{
"epoch": 0.4,
"grad_norm": 0.9297115206718445,
"learning_rate": 6.872821568515275e-06,
"loss": 0.6586,
"step": 6258
},
{
"epoch": 0.4,
"grad_norm": 0.8991652727127075,
"learning_rate": 6.8718702267913325e-06,
"loss": 0.6056,
"step": 6259
},
{
"epoch": 0.4,
"grad_norm": 0.8950271010398865,
"learning_rate": 6.870918806249449e-06,
"loss": 0.6192,
"step": 6260
},
{
"epoch": 0.4,
"grad_norm": 0.8827762007713318,
"learning_rate": 6.8699673069296806e-06,
"loss": 0.588,
"step": 6261
},
{
"epoch": 0.4,
"grad_norm": 0.8640381693840027,
"learning_rate": 6.869015728872095e-06,
"loss": 0.6255,
"step": 6262
},
{
"epoch": 0.4,
"grad_norm": 0.8890305757522583,
"learning_rate": 6.868064072116758e-06,
"loss": 0.6502,
"step": 6263
},
{
"epoch": 0.4,
"grad_norm": 0.854560911655426,
"learning_rate": 6.867112336703743e-06,
"loss": 0.5748,
"step": 6264
},
{
"epoch": 0.4,
"grad_norm": 0.872962236404419,
"learning_rate": 6.866160522673121e-06,
"loss": 0.6329,
"step": 6265
},
{
"epoch": 0.4,
"grad_norm": 0.8564478754997253,
"learning_rate": 6.865208630064973e-06,
"loss": 0.6265,
"step": 6266
},
{
"epoch": 0.4,
"grad_norm": 0.8863121271133423,
"learning_rate": 6.864256658919377e-06,
"loss": 0.5473,
"step": 6267
},
{
"epoch": 0.4,
"grad_norm": 0.8578380942344666,
"learning_rate": 6.8633046092764174e-06,
"loss": 0.6347,
"step": 6268
},
{
"epoch": 0.4,
"grad_norm": 0.8845486640930176,
"learning_rate": 6.862352481176184e-06,
"loss": 0.6456,
"step": 6269
},
{
"epoch": 0.4,
"grad_norm": 0.9655935168266296,
"learning_rate": 6.861400274658767e-06,
"loss": 0.5902,
"step": 6270
},
{
"epoch": 0.4,
"grad_norm": 0.8958570957183838,
"learning_rate": 6.860447989764259e-06,
"loss": 0.5804,
"step": 6271
},
{
"epoch": 0.4,
"grad_norm": 0.8562657237052917,
"learning_rate": 6.8594956265327585e-06,
"loss": 0.574,
"step": 6272
},
{
"epoch": 0.4,
"grad_norm": 0.9815998077392578,
"learning_rate": 6.858543185004365e-06,
"loss": 0.6155,
"step": 6273
},
{
"epoch": 0.4,
"grad_norm": 0.8810309171676636,
"learning_rate": 6.857590665219185e-06,
"loss": 0.6283,
"step": 6274
},
{
"epoch": 0.4,
"grad_norm": 0.8395465016365051,
"learning_rate": 6.856638067217324e-06,
"loss": 0.5414,
"step": 6275
},
{
"epoch": 0.4,
"grad_norm": 0.9288424253463745,
"learning_rate": 6.85568539103889e-06,
"loss": 0.5853,
"step": 6276
},
{
"epoch": 0.4,
"grad_norm": 0.9081584215164185,
"learning_rate": 6.854732636724002e-06,
"loss": 0.6545,
"step": 6277
},
{
"epoch": 0.4,
"grad_norm": 0.8159523606300354,
"learning_rate": 6.853779804312775e-06,
"loss": 0.5649,
"step": 6278
},
{
"epoch": 0.4,
"grad_norm": 0.92462158203125,
"learning_rate": 6.8528268938453295e-06,
"loss": 0.5591,
"step": 6279
},
{
"epoch": 0.4,
"grad_norm": 0.9456450939178467,
"learning_rate": 6.851873905361786e-06,
"loss": 0.6015,
"step": 6280
},
{
"epoch": 0.4,
"grad_norm": 0.9764153957366943,
"learning_rate": 6.850920838902278e-06,
"loss": 0.6429,
"step": 6281
},
{
"epoch": 0.4,
"grad_norm": 0.893409252166748,
"learning_rate": 6.84996769450693e-06,
"loss": 0.5466,
"step": 6282
},
{
"epoch": 0.4,
"grad_norm": 0.9273908734321594,
"learning_rate": 6.84901447221588e-06,
"loss": 0.655,
"step": 6283
},
{
"epoch": 0.4,
"grad_norm": 0.8750333189964294,
"learning_rate": 6.84806117206926e-06,
"loss": 0.6498,
"step": 6284
},
{
"epoch": 0.4,
"grad_norm": 0.8754233121871948,
"learning_rate": 6.847107794107216e-06,
"loss": 0.5554,
"step": 6285
},
{
"epoch": 0.4,
"grad_norm": 0.93915194272995,
"learning_rate": 6.846154338369887e-06,
"loss": 0.6434,
"step": 6286
},
{
"epoch": 0.4,
"grad_norm": 0.835665225982666,
"learning_rate": 6.845200804897421e-06,
"loss": 0.6035,
"step": 6287
},
{
"epoch": 0.4,
"grad_norm": 0.8906847834587097,
"learning_rate": 6.844247193729968e-06,
"loss": 0.636,
"step": 6288
},
{
"epoch": 0.4,
"grad_norm": 0.8233811855316162,
"learning_rate": 6.843293504907682e-06,
"loss": 0.5461,
"step": 6289
},
{
"epoch": 0.4,
"grad_norm": 0.9119184613227844,
"learning_rate": 6.84233973847072e-06,
"loss": 0.5923,
"step": 6290
},
{
"epoch": 0.4,
"grad_norm": 0.9312586784362793,
"learning_rate": 6.8413858944592385e-06,
"loss": 0.66,
"step": 6291
},
{
"epoch": 0.4,
"grad_norm": 0.8756263256072998,
"learning_rate": 6.840431972913404e-06,
"loss": 0.6262,
"step": 6292
},
{
"epoch": 0.4,
"grad_norm": 0.8882813453674316,
"learning_rate": 6.83947797387338e-06,
"loss": 0.5798,
"step": 6293
},
{
"epoch": 0.4,
"grad_norm": 0.8455925583839417,
"learning_rate": 6.838523897379339e-06,
"loss": 0.618,
"step": 6294
},
{
"epoch": 0.4,
"grad_norm": 0.8319289684295654,
"learning_rate": 6.837569743471451e-06,
"loss": 0.6029,
"step": 6295
},
{
"epoch": 0.4,
"grad_norm": 0.8721569180488586,
"learning_rate": 6.836615512189895e-06,
"loss": 0.5526,
"step": 6296
},
{
"epoch": 0.4,
"grad_norm": 0.8549659848213196,
"learning_rate": 6.835661203574848e-06,
"loss": 0.5947,
"step": 6297
},
{
"epoch": 0.4,
"grad_norm": 0.8474895358085632,
"learning_rate": 6.834706817666495e-06,
"loss": 0.6118,
"step": 6298
},
{
"epoch": 0.4,
"grad_norm": 0.8855010271072388,
"learning_rate": 6.833752354505019e-06,
"loss": 0.5868,
"step": 6299
},
{
"epoch": 0.4,
"grad_norm": 0.8940566182136536,
"learning_rate": 6.832797814130611e-06,
"loss": 0.5843,
"step": 6300
},
{
"epoch": 0.4,
"grad_norm": 0.842008650302887,
"learning_rate": 6.831843196583462e-06,
"loss": 0.6042,
"step": 6301
},
{
"epoch": 0.4,
"grad_norm": 0.8390910029411316,
"learning_rate": 6.8308885019037695e-06,
"loss": 0.5937,
"step": 6302
},
{
"epoch": 0.4,
"grad_norm": 0.8749220967292786,
"learning_rate": 6.82993373013173e-06,
"loss": 0.6125,
"step": 6303
},
{
"epoch": 0.4,
"grad_norm": 0.9013246297836304,
"learning_rate": 6.8289788813075485e-06,
"loss": 0.5911,
"step": 6304
},
{
"epoch": 0.4,
"grad_norm": 0.9145839810371399,
"learning_rate": 6.82802395547143e-06,
"loss": 0.5748,
"step": 6305
},
{
"epoch": 0.4,
"grad_norm": 0.8356090188026428,
"learning_rate": 6.82706895266358e-06,
"loss": 0.4983,
"step": 6306
},
{
"epoch": 0.4,
"grad_norm": 0.8637154698371887,
"learning_rate": 6.826113872924213e-06,
"loss": 0.6237,
"step": 6307
},
{
"epoch": 0.4,
"grad_norm": 0.8588926792144775,
"learning_rate": 6.825158716293543e-06,
"loss": 0.6215,
"step": 6308
},
{
"epoch": 0.4,
"grad_norm": 0.8768167495727539,
"learning_rate": 6.824203482811788e-06,
"loss": 0.5862,
"step": 6309
},
{
"epoch": 0.4,
"grad_norm": 0.8740860819816589,
"learning_rate": 6.823248172519173e-06,
"loss": 0.5699,
"step": 6310
},
{
"epoch": 0.4,
"grad_norm": 0.937689483165741,
"learning_rate": 6.8222927854559175e-06,
"loss": 0.6146,
"step": 6311
},
{
"epoch": 0.4,
"grad_norm": 0.8367653489112854,
"learning_rate": 6.8213373216622514e-06,
"loss": 0.5808,
"step": 6312
},
{
"epoch": 0.4,
"grad_norm": 0.9312880635261536,
"learning_rate": 6.820381781178409e-06,
"loss": 0.6059,
"step": 6313
},
{
"epoch": 0.4,
"grad_norm": 0.9240770936012268,
"learning_rate": 6.819426164044622e-06,
"loss": 0.6084,
"step": 6314
},
{
"epoch": 0.4,
"grad_norm": 0.8993687629699707,
"learning_rate": 6.818470470301128e-06,
"loss": 0.5742,
"step": 6315
},
{
"epoch": 0.4,
"grad_norm": 0.8884747624397278,
"learning_rate": 6.817514699988168e-06,
"loss": 0.5959,
"step": 6316
},
{
"epoch": 0.4,
"grad_norm": 0.919092059135437,
"learning_rate": 6.8165588531459885e-06,
"loss": 0.612,
"step": 6317
},
{
"epoch": 0.4,
"grad_norm": 0.8630106449127197,
"learning_rate": 6.815602929814833e-06,
"loss": 0.5945,
"step": 6318
},
{
"epoch": 0.4,
"grad_norm": 0.8956739902496338,
"learning_rate": 6.814646930034954e-06,
"loss": 0.6494,
"step": 6319
},
{
"epoch": 0.4,
"grad_norm": 0.8530880808830261,
"learning_rate": 6.813690853846606e-06,
"loss": 0.5881,
"step": 6320
},
{
"epoch": 0.4,
"grad_norm": 0.9456024765968323,
"learning_rate": 6.8127347012900465e-06,
"loss": 0.592,
"step": 6321
},
{
"epoch": 0.4,
"grad_norm": 0.959709882736206,
"learning_rate": 6.811778472405534e-06,
"loss": 0.6175,
"step": 6322
},
{
"epoch": 0.4,
"grad_norm": 0.8467543721199036,
"learning_rate": 6.810822167233333e-06,
"loss": 0.5823,
"step": 6323
},
{
"epoch": 0.4,
"grad_norm": 0.9109113216400146,
"learning_rate": 6.80986578581371e-06,
"loss": 0.618,
"step": 6324
},
{
"epoch": 0.4,
"grad_norm": 0.9041874408721924,
"learning_rate": 6.808909328186934e-06,
"loss": 0.6321,
"step": 6325
},
{
"epoch": 0.4,
"grad_norm": 0.81452476978302,
"learning_rate": 6.80795279439328e-06,
"loss": 0.5553,
"step": 6326
},
{
"epoch": 0.4,
"grad_norm": 0.8997363448143005,
"learning_rate": 6.806996184473023e-06,
"loss": 0.623,
"step": 6327
},
{
"epoch": 0.4,
"grad_norm": 0.9020070433616638,
"learning_rate": 6.806039498466444e-06,
"loss": 0.5917,
"step": 6328
},
{
"epoch": 0.4,
"grad_norm": 0.8951176404953003,
"learning_rate": 6.805082736413822e-06,
"loss": 0.6224,
"step": 6329
},
{
"epoch": 0.4,
"grad_norm": 0.9783088564872742,
"learning_rate": 6.804125898355447e-06,
"loss": 0.5973,
"step": 6330
},
{
"epoch": 0.4,
"grad_norm": 0.8250484466552734,
"learning_rate": 6.8031689843316054e-06,
"loss": 0.5555,
"step": 6331
},
{
"epoch": 0.4,
"grad_norm": 0.8294229507446289,
"learning_rate": 6.802211994382591e-06,
"loss": 0.5801,
"step": 6332
},
{
"epoch": 0.4,
"grad_norm": 0.8849250078201294,
"learning_rate": 6.8012549285487e-06,
"loss": 0.6152,
"step": 6333
},
{
"epoch": 0.4,
"grad_norm": 0.8871194124221802,
"learning_rate": 6.800297786870228e-06,
"loss": 0.5972,
"step": 6334
},
{
"epoch": 0.4,
"grad_norm": 0.8779382705688477,
"learning_rate": 6.799340569387481e-06,
"loss": 0.548,
"step": 6335
},
{
"epoch": 0.4,
"grad_norm": 0.8883922100067139,
"learning_rate": 6.798383276140761e-06,
"loss": 0.611,
"step": 6336
},
{
"epoch": 0.4,
"grad_norm": 0.9105244874954224,
"learning_rate": 6.797425907170378e-06,
"loss": 0.5728,
"step": 6337
},
{
"epoch": 0.4,
"grad_norm": 0.8537696003913879,
"learning_rate": 6.796468462516642e-06,
"loss": 0.5878,
"step": 6338
},
{
"epoch": 0.4,
"grad_norm": 0.9340306520462036,
"learning_rate": 6.79551094221987e-06,
"loss": 0.6079,
"step": 6339
},
{
"epoch": 0.4,
"grad_norm": 0.8353374600410461,
"learning_rate": 6.794553346320376e-06,
"loss": 0.5569,
"step": 6340
},
{
"epoch": 0.4,
"grad_norm": 0.9598260521888733,
"learning_rate": 6.7935956748584855e-06,
"loss": 0.5305,
"step": 6341
},
{
"epoch": 0.4,
"grad_norm": 0.8836723566055298,
"learning_rate": 6.792637927874519e-06,
"loss": 0.6038,
"step": 6342
},
{
"epoch": 0.4,
"grad_norm": 0.930091142654419,
"learning_rate": 6.791680105408807e-06,
"loss": 0.6583,
"step": 6343
},
{
"epoch": 0.4,
"grad_norm": 0.9237890839576721,
"learning_rate": 6.790722207501678e-06,
"loss": 0.6554,
"step": 6344
},
{
"epoch": 0.4,
"grad_norm": 0.8894320726394653,
"learning_rate": 6.789764234193465e-06,
"loss": 0.5665,
"step": 6345
},
{
"epoch": 0.4,
"grad_norm": 0.9483606815338135,
"learning_rate": 6.788806185524508e-06,
"loss": 0.6341,
"step": 6346
},
{
"epoch": 0.4,
"grad_norm": 0.948627769947052,
"learning_rate": 6.787848061535145e-06,
"loss": 0.6711,
"step": 6347
},
{
"epoch": 0.4,
"grad_norm": 0.8971147537231445,
"learning_rate": 6.786889862265719e-06,
"loss": 0.5643,
"step": 6348
},
{
"epoch": 0.4,
"grad_norm": 0.8906237483024597,
"learning_rate": 6.7859315877565775e-06,
"loss": 0.5608,
"step": 6349
},
{
"epoch": 0.4,
"grad_norm": 0.9154103398323059,
"learning_rate": 6.784973238048069e-06,
"loss": 0.6419,
"step": 6350
},
{
"epoch": 0.4,
"grad_norm": 0.8681836128234863,
"learning_rate": 6.7840148131805485e-06,
"loss": 0.6173,
"step": 6351
},
{
"epoch": 0.4,
"grad_norm": 0.8321382403373718,
"learning_rate": 6.783056313194369e-06,
"loss": 0.5718,
"step": 6352
},
{
"epoch": 0.4,
"grad_norm": 0.8255459666252136,
"learning_rate": 6.7820977381298915e-06,
"loss": 0.6267,
"step": 6353
},
{
"epoch": 0.4,
"grad_norm": 0.8768226504325867,
"learning_rate": 6.781139088027477e-06,
"loss": 0.6143,
"step": 6354
},
{
"epoch": 0.4,
"grad_norm": 0.9021497368812561,
"learning_rate": 6.780180362927492e-06,
"loss": 0.5718,
"step": 6355
},
{
"epoch": 0.4,
"grad_norm": 0.8666380643844604,
"learning_rate": 6.779221562870306e-06,
"loss": 0.5783,
"step": 6356
},
{
"epoch": 0.4,
"grad_norm": 0.9858885407447815,
"learning_rate": 6.778262687896287e-06,
"loss": 0.6102,
"step": 6357
},
{
"epoch": 0.4,
"grad_norm": 0.973626434803009,
"learning_rate": 6.777303738045814e-06,
"loss": 0.6679,
"step": 6358
},
{
"epoch": 0.4,
"grad_norm": 0.8242490291595459,
"learning_rate": 6.776344713359263e-06,
"loss": 0.593,
"step": 6359
},
{
"epoch": 0.4,
"grad_norm": 0.8745444416999817,
"learning_rate": 6.775385613877016e-06,
"loss": 0.6231,
"step": 6360
},
{
"epoch": 0.4,
"grad_norm": 0.8920515775680542,
"learning_rate": 6.774426439639455e-06,
"loss": 0.5821,
"step": 6361
},
{
"epoch": 0.4,
"grad_norm": 0.9180237650871277,
"learning_rate": 6.773467190686972e-06,
"loss": 0.625,
"step": 6362
},
{
"epoch": 0.4,
"grad_norm": 0.9676087498664856,
"learning_rate": 6.772507867059953e-06,
"loss": 0.6289,
"step": 6363
},
{
"epoch": 0.4,
"grad_norm": 0.9148452281951904,
"learning_rate": 6.771548468798796e-06,
"loss": 0.5209,
"step": 6364
},
{
"epoch": 0.4,
"grad_norm": 0.9174354076385498,
"learning_rate": 6.770588995943893e-06,
"loss": 0.5529,
"step": 6365
},
{
"epoch": 0.4,
"grad_norm": 0.9325718879699707,
"learning_rate": 6.769629448535648e-06,
"loss": 0.6186,
"step": 6366
},
{
"epoch": 0.4,
"grad_norm": 0.9010034799575806,
"learning_rate": 6.768669826614464e-06,
"loss": 0.5967,
"step": 6367
},
{
"epoch": 0.4,
"grad_norm": 0.8638269901275635,
"learning_rate": 6.767710130220745e-06,
"loss": 0.6489,
"step": 6368
},
{
"epoch": 0.4,
"grad_norm": 0.8227560520172119,
"learning_rate": 6.766750359394904e-06,
"loss": 0.5682,
"step": 6369
},
{
"epoch": 0.4,
"grad_norm": 0.9004592895507812,
"learning_rate": 6.76579051417735e-06,
"loss": 0.6632,
"step": 6370
},
{
"epoch": 0.4,
"grad_norm": 0.8845899701118469,
"learning_rate": 6.7648305946085e-06,
"loss": 0.5959,
"step": 6371
},
{
"epoch": 0.4,
"grad_norm": 0.9487060904502869,
"learning_rate": 6.763870600728772e-06,
"loss": 0.6677,
"step": 6372
},
{
"epoch": 0.4,
"grad_norm": 0.8828071355819702,
"learning_rate": 6.76291053257859e-06,
"loss": 0.5542,
"step": 6373
},
{
"epoch": 0.4,
"grad_norm": 0.9216554164886475,
"learning_rate": 6.761950390198378e-06,
"loss": 0.6145,
"step": 6374
},
{
"epoch": 0.4,
"grad_norm": 0.8994758725166321,
"learning_rate": 6.760990173628566e-06,
"loss": 0.5999,
"step": 6375
},
{
"epoch": 0.4,
"grad_norm": 0.8886323571205139,
"learning_rate": 6.760029882909582e-06,
"loss": 0.5941,
"step": 6376
},
{
"epoch": 0.4,
"grad_norm": 0.8494300842285156,
"learning_rate": 6.759069518081863e-06,
"loss": 0.5829,
"step": 6377
},
{
"epoch": 0.4,
"grad_norm": 0.9388317465782166,
"learning_rate": 6.758109079185846e-06,
"loss": 0.593,
"step": 6378
},
{
"epoch": 0.4,
"grad_norm": 0.9075881838798523,
"learning_rate": 6.757148566261973e-06,
"loss": 0.5656,
"step": 6379
},
{
"epoch": 0.4,
"grad_norm": 0.9015637040138245,
"learning_rate": 6.756187979350684e-06,
"loss": 0.6046,
"step": 6380
},
{
"epoch": 0.4,
"grad_norm": 0.9175539016723633,
"learning_rate": 6.75522731849243e-06,
"loss": 0.5636,
"step": 6381
},
{
"epoch": 0.4,
"grad_norm": 0.8536416292190552,
"learning_rate": 6.754266583727659e-06,
"loss": 0.6258,
"step": 6382
},
{
"epoch": 0.4,
"grad_norm": 0.84648197889328,
"learning_rate": 6.753305775096826e-06,
"loss": 0.5732,
"step": 6383
},
{
"epoch": 0.4,
"grad_norm": 0.8069581389427185,
"learning_rate": 6.752344892640384e-06,
"loss": 0.5564,
"step": 6384
},
{
"epoch": 0.4,
"grad_norm": 0.8196657299995422,
"learning_rate": 6.751383936398796e-06,
"loss": 0.5909,
"step": 6385
},
{
"epoch": 0.4,
"grad_norm": 0.9850438237190247,
"learning_rate": 6.750422906412523e-06,
"loss": 0.6585,
"step": 6386
},
{
"epoch": 0.4,
"grad_norm": 0.9552303552627563,
"learning_rate": 6.749461802722032e-06,
"loss": 0.6039,
"step": 6387
},
{
"epoch": 0.4,
"grad_norm": 0.955740213394165,
"learning_rate": 6.7485006253677875e-06,
"loss": 0.6084,
"step": 6388
},
{
"epoch": 0.4,
"grad_norm": 1.0010960102081299,
"learning_rate": 6.747539374390266e-06,
"loss": 0.6799,
"step": 6389
},
{
"epoch": 0.4,
"grad_norm": 0.909136950969696,
"learning_rate": 6.746578049829942e-06,
"loss": 0.5671,
"step": 6390
},
{
"epoch": 0.4,
"grad_norm": 0.8283319473266602,
"learning_rate": 6.745616651727289e-06,
"loss": 0.5801,
"step": 6391
},
{
"epoch": 0.4,
"grad_norm": 0.8886178135871887,
"learning_rate": 6.744655180122793e-06,
"loss": 0.5932,
"step": 6392
},
{
"epoch": 0.41,
"grad_norm": 0.9179041385650635,
"learning_rate": 6.743693635056936e-06,
"loss": 0.6413,
"step": 6393
},
{
"epoch": 0.41,
"grad_norm": 0.9177907109260559,
"learning_rate": 6.742732016570207e-06,
"loss": 0.6361,
"step": 6394
},
{
"epoch": 0.41,
"grad_norm": 0.8754076361656189,
"learning_rate": 6.741770324703095e-06,
"loss": 0.6194,
"step": 6395
},
{
"epoch": 0.41,
"grad_norm": 0.8487926721572876,
"learning_rate": 6.740808559496093e-06,
"loss": 0.5599,
"step": 6396
},
{
"epoch": 0.41,
"grad_norm": 0.9245063066482544,
"learning_rate": 6.739846720989699e-06,
"loss": 0.6556,
"step": 6397
},
{
"epoch": 0.41,
"grad_norm": 0.9024572968482971,
"learning_rate": 6.738884809224413e-06,
"loss": 0.5621,
"step": 6398
},
{
"epoch": 0.41,
"grad_norm": 0.9168578386306763,
"learning_rate": 6.7379228242407345e-06,
"loss": 0.6098,
"step": 6399
},
{
"epoch": 0.41,
"grad_norm": 0.8839691281318665,
"learning_rate": 6.736960766079173e-06,
"loss": 0.5978,
"step": 6400
},
{
"epoch": 0.41,
"grad_norm": 0.9675304293632507,
"learning_rate": 6.735998634780238e-06,
"loss": 0.5849,
"step": 6401
},
{
"epoch": 0.41,
"grad_norm": 0.8997515439987183,
"learning_rate": 6.735036430384436e-06,
"loss": 0.5645,
"step": 6402
},
{
"epoch": 0.41,
"grad_norm": 0.8792773485183716,
"learning_rate": 6.7340741529322875e-06,
"loss": 0.6105,
"step": 6403
},
{
"epoch": 0.41,
"grad_norm": 0.9032172560691833,
"learning_rate": 6.733111802464308e-06,
"loss": 0.5789,
"step": 6404
},
{
"epoch": 0.41,
"grad_norm": 0.8126611113548279,
"learning_rate": 6.732149379021022e-06,
"loss": 0.5711,
"step": 6405
},
{
"epoch": 0.41,
"grad_norm": 0.8911159038543701,
"learning_rate": 6.7311868826429485e-06,
"loss": 0.6068,
"step": 6406
},
{
"epoch": 0.41,
"grad_norm": 0.9121822714805603,
"learning_rate": 6.730224313370619e-06,
"loss": 0.6165,
"step": 6407
},
{
"epoch": 0.41,
"grad_norm": 0.8678528070449829,
"learning_rate": 6.729261671244563e-06,
"loss": 0.5745,
"step": 6408
},
{
"epoch": 0.41,
"grad_norm": 0.9104927182197571,
"learning_rate": 6.728298956305313e-06,
"loss": 0.5595,
"step": 6409
},
{
"epoch": 0.41,
"grad_norm": 0.9237872958183289,
"learning_rate": 6.727336168593406e-06,
"loss": 0.582,
"step": 6410
},
{
"epoch": 0.41,
"grad_norm": 0.9053632020950317,
"learning_rate": 6.726373308149382e-06,
"loss": 0.5984,
"step": 6411
},
{
"epoch": 0.41,
"grad_norm": 0.85235995054245,
"learning_rate": 6.725410375013783e-06,
"loss": 0.5581,
"step": 6412
},
{
"epoch": 0.41,
"grad_norm": 0.8615298271179199,
"learning_rate": 6.724447369227159e-06,
"loss": 0.5921,
"step": 6413
},
{
"epoch": 0.41,
"grad_norm": 0.9467587471008301,
"learning_rate": 6.723484290830051e-06,
"loss": 0.5917,
"step": 6414
},
{
"epoch": 0.41,
"grad_norm": 0.9265984892845154,
"learning_rate": 6.722521139863017e-06,
"loss": 0.6216,
"step": 6415
},
{
"epoch": 0.41,
"grad_norm": 0.8947895169258118,
"learning_rate": 6.72155791636661e-06,
"loss": 0.6052,
"step": 6416
},
{
"epoch": 0.41,
"grad_norm": 0.8797786235809326,
"learning_rate": 6.720594620381387e-06,
"loss": 0.5621,
"step": 6417
},
{
"epoch": 0.41,
"grad_norm": 0.9423597455024719,
"learning_rate": 6.71963125194791e-06,
"loss": 0.6233,
"step": 6418
},
{
"epoch": 0.41,
"grad_norm": 0.9435870051383972,
"learning_rate": 6.718667811106744e-06,
"loss": 0.5961,
"step": 6419
},
{
"epoch": 0.41,
"grad_norm": 0.9278707504272461,
"learning_rate": 6.717704297898455e-06,
"loss": 0.5821,
"step": 6420
},
{
"epoch": 0.41,
"grad_norm": 0.8902246952056885,
"learning_rate": 6.716740712363614e-06,
"loss": 0.5672,
"step": 6421
},
{
"epoch": 0.41,
"grad_norm": 0.9437769651412964,
"learning_rate": 6.715777054542793e-06,
"loss": 0.6031,
"step": 6422
},
{
"epoch": 0.41,
"grad_norm": 0.9507419466972351,
"learning_rate": 6.714813324476569e-06,
"loss": 0.5812,
"step": 6423
},
{
"epoch": 0.41,
"grad_norm": 0.9317444562911987,
"learning_rate": 6.713849522205522e-06,
"loss": 0.5611,
"step": 6424
},
{
"epoch": 0.41,
"grad_norm": 0.8754682540893555,
"learning_rate": 6.712885647770233e-06,
"loss": 0.6031,
"step": 6425
},
{
"epoch": 0.41,
"grad_norm": 0.9129989743232727,
"learning_rate": 6.711921701211288e-06,
"loss": 0.5967,
"step": 6426
},
{
"epoch": 0.41,
"grad_norm": 0.9079276919364929,
"learning_rate": 6.710957682569276e-06,
"loss": 0.6603,
"step": 6427
},
{
"epoch": 0.41,
"grad_norm": 0.8813990950584412,
"learning_rate": 6.709993591884788e-06,
"loss": 0.5873,
"step": 6428
},
{
"epoch": 0.41,
"grad_norm": 0.8813159465789795,
"learning_rate": 6.709029429198418e-06,
"loss": 0.5746,
"step": 6429
},
{
"epoch": 0.41,
"grad_norm": 0.9071645140647888,
"learning_rate": 6.7080651945507645e-06,
"loss": 0.5743,
"step": 6430
},
{
"epoch": 0.41,
"grad_norm": 0.8338029384613037,
"learning_rate": 6.707100887982427e-06,
"loss": 0.5769,
"step": 6431
},
{
"epoch": 0.41,
"grad_norm": 0.8543631434440613,
"learning_rate": 6.7061365095340105e-06,
"loss": 0.592,
"step": 6432
},
{
"epoch": 0.41,
"grad_norm": 0.9253416061401367,
"learning_rate": 6.70517205924612e-06,
"loss": 0.6099,
"step": 6433
},
{
"epoch": 0.41,
"grad_norm": 0.846316933631897,
"learning_rate": 6.7042075371593665e-06,
"loss": 0.5734,
"step": 6434
},
{
"epoch": 0.41,
"grad_norm": 0.9376114010810852,
"learning_rate": 6.703242943314362e-06,
"loss": 0.6333,
"step": 6435
},
{
"epoch": 0.41,
"grad_norm": 0.970414400100708,
"learning_rate": 6.702278277751722e-06,
"loss": 0.5598,
"step": 6436
},
{
"epoch": 0.41,
"grad_norm": 0.957120418548584,
"learning_rate": 6.701313540512065e-06,
"loss": 0.6345,
"step": 6437
},
{
"epoch": 0.41,
"grad_norm": 0.932551920413971,
"learning_rate": 6.700348731636014e-06,
"loss": 0.5905,
"step": 6438
},
{
"epoch": 0.41,
"grad_norm": 0.9044030904769897,
"learning_rate": 6.699383851164194e-06,
"loss": 0.6525,
"step": 6439
},
{
"epoch": 0.41,
"grad_norm": 0.8771166205406189,
"learning_rate": 6.6984188991372305e-06,
"loss": 0.599,
"step": 6440
},
{
"epoch": 0.41,
"grad_norm": 0.8178818821907043,
"learning_rate": 6.697453875595755e-06,
"loss": 0.5609,
"step": 6441
},
{
"epoch": 0.41,
"grad_norm": 0.8368890881538391,
"learning_rate": 6.696488780580403e-06,
"loss": 0.543,
"step": 6442
},
{
"epoch": 0.41,
"grad_norm": 0.8307216763496399,
"learning_rate": 6.69552361413181e-06,
"loss": 0.5784,
"step": 6443
},
{
"epoch": 0.41,
"grad_norm": 0.8592568039894104,
"learning_rate": 6.694558376290615e-06,
"loss": 0.603,
"step": 6444
},
{
"epoch": 0.41,
"grad_norm": 0.8686701655387878,
"learning_rate": 6.693593067097462e-06,
"loss": 0.6026,
"step": 6445
},
{
"epoch": 0.41,
"grad_norm": 0.9390038251876831,
"learning_rate": 6.692627686592998e-06,
"loss": 0.6531,
"step": 6446
},
{
"epoch": 0.41,
"grad_norm": 0.9398483633995056,
"learning_rate": 6.691662234817869e-06,
"loss": 0.6016,
"step": 6447
},
{
"epoch": 0.41,
"grad_norm": 0.8840192556381226,
"learning_rate": 6.690696711812729e-06,
"loss": 0.5461,
"step": 6448
},
{
"epoch": 0.41,
"grad_norm": 0.8928658366203308,
"learning_rate": 6.68973111761823e-06,
"loss": 0.5753,
"step": 6449
},
{
"epoch": 0.41,
"grad_norm": 0.9356186985969543,
"learning_rate": 6.688765452275033e-06,
"loss": 0.6636,
"step": 6450
},
{
"epoch": 0.41,
"grad_norm": 0.8654458522796631,
"learning_rate": 6.687799715823798e-06,
"loss": 0.5351,
"step": 6451
},
{
"epoch": 0.41,
"grad_norm": 0.867955207824707,
"learning_rate": 6.686833908305188e-06,
"loss": 0.6091,
"step": 6452
},
{
"epoch": 0.41,
"grad_norm": 0.8342301845550537,
"learning_rate": 6.68586802975987e-06,
"loss": 0.591,
"step": 6453
},
{
"epoch": 0.41,
"grad_norm": 0.9121977090835571,
"learning_rate": 6.684902080228514e-06,
"loss": 0.5892,
"step": 6454
},
{
"epoch": 0.41,
"grad_norm": 0.9055156111717224,
"learning_rate": 6.6839360597517935e-06,
"loss": 0.5665,
"step": 6455
},
{
"epoch": 0.41,
"grad_norm": 0.8971875905990601,
"learning_rate": 6.682969968370383e-06,
"loss": 0.6021,
"step": 6456
},
{
"epoch": 0.41,
"grad_norm": 0.9293539524078369,
"learning_rate": 6.68200380612496e-06,
"loss": 0.5815,
"step": 6457
},
{
"epoch": 0.41,
"grad_norm": 0.9090824127197266,
"learning_rate": 6.681037573056211e-06,
"loss": 0.5778,
"step": 6458
},
{
"epoch": 0.41,
"grad_norm": 0.9384252429008484,
"learning_rate": 6.6800712692048164e-06,
"loss": 0.5974,
"step": 6459
},
{
"epoch": 0.41,
"grad_norm": 0.9273927211761475,
"learning_rate": 6.679104894611466e-06,
"loss": 0.6242,
"step": 6460
},
{
"epoch": 0.41,
"grad_norm": 0.9325118660926819,
"learning_rate": 6.678138449316848e-06,
"loss": 0.6443,
"step": 6461
},
{
"epoch": 0.41,
"grad_norm": 0.8972262740135193,
"learning_rate": 6.6771719333616584e-06,
"loss": 0.568,
"step": 6462
},
{
"epoch": 0.41,
"grad_norm": 0.828413724899292,
"learning_rate": 6.676205346786594e-06,
"loss": 0.5929,
"step": 6463
},
{
"epoch": 0.41,
"grad_norm": 0.8351660966873169,
"learning_rate": 6.6752386896323526e-06,
"loss": 0.6104,
"step": 6464
},
{
"epoch": 0.41,
"grad_norm": 0.9743680953979492,
"learning_rate": 6.674271961939638e-06,
"loss": 0.6608,
"step": 6465
},
{
"epoch": 0.41,
"grad_norm": 0.8384668231010437,
"learning_rate": 6.673305163749155e-06,
"loss": 0.5683,
"step": 6466
},
{
"epoch": 0.41,
"grad_norm": 0.8962710499763489,
"learning_rate": 6.672338295101614e-06,
"loss": 0.5661,
"step": 6467
},
{
"epoch": 0.41,
"grad_norm": 0.8527003526687622,
"learning_rate": 6.671371356037723e-06,
"loss": 0.6172,
"step": 6468
},
{
"epoch": 0.41,
"grad_norm": 0.967922568321228,
"learning_rate": 6.670404346598199e-06,
"loss": 0.605,
"step": 6469
},
{
"epoch": 0.41,
"grad_norm": 0.8897997736930847,
"learning_rate": 6.669437266823759e-06,
"loss": 0.6087,
"step": 6470
},
{
"epoch": 0.41,
"grad_norm": 0.9014569520950317,
"learning_rate": 6.668470116755125e-06,
"loss": 0.589,
"step": 6471
},
{
"epoch": 0.41,
"grad_norm": 0.8684948086738586,
"learning_rate": 6.6675028964330156e-06,
"loss": 0.5962,
"step": 6472
},
{
"epoch": 0.41,
"grad_norm": 0.8721036911010742,
"learning_rate": 6.666535605898162e-06,
"loss": 0.666,
"step": 6473
},
{
"epoch": 0.41,
"grad_norm": 0.8894490599632263,
"learning_rate": 6.6655682451912915e-06,
"loss": 0.5945,
"step": 6474
},
{
"epoch": 0.41,
"grad_norm": 0.8807538747787476,
"learning_rate": 6.664600814353137e-06,
"loss": 0.6073,
"step": 6475
},
{
"epoch": 0.41,
"grad_norm": 0.9010364413261414,
"learning_rate": 6.6636333134244305e-06,
"loss": 0.5884,
"step": 6476
},
{
"epoch": 0.41,
"grad_norm": 0.8854992985725403,
"learning_rate": 6.662665742445914e-06,
"loss": 0.5852,
"step": 6477
},
{
"epoch": 0.41,
"grad_norm": 0.8660020232200623,
"learning_rate": 6.661698101458327e-06,
"loss": 0.5954,
"step": 6478
},
{
"epoch": 0.41,
"grad_norm": 0.8577721118927002,
"learning_rate": 6.660730390502414e-06,
"loss": 0.5837,
"step": 6479
},
{
"epoch": 0.41,
"grad_norm": 0.8442829251289368,
"learning_rate": 6.6597626096189206e-06,
"loss": 0.619,
"step": 6480
},
{
"epoch": 0.41,
"grad_norm": 0.877422571182251,
"learning_rate": 6.658794758848598e-06,
"loss": 0.6028,
"step": 6481
},
{
"epoch": 0.41,
"grad_norm": 0.880001425743103,
"learning_rate": 6.6578268382322e-06,
"loss": 0.5807,
"step": 6482
},
{
"epoch": 0.41,
"grad_norm": 0.8305491209030151,
"learning_rate": 6.656858847810479e-06,
"loss": 0.5786,
"step": 6483
},
{
"epoch": 0.41,
"grad_norm": 0.8943942785263062,
"learning_rate": 6.655890787624195e-06,
"loss": 0.5586,
"step": 6484
},
{
"epoch": 0.41,
"grad_norm": 0.893250584602356,
"learning_rate": 6.654922657714112e-06,
"loss": 0.5612,
"step": 6485
},
{
"epoch": 0.41,
"grad_norm": 0.9150073528289795,
"learning_rate": 6.6539544581209935e-06,
"loss": 0.6442,
"step": 6486
},
{
"epoch": 0.41,
"grad_norm": 0.8710561394691467,
"learning_rate": 6.652986188885605e-06,
"loss": 0.6025,
"step": 6487
},
{
"epoch": 0.41,
"grad_norm": 0.8758864402770996,
"learning_rate": 6.652017850048719e-06,
"loss": 0.5879,
"step": 6488
},
{
"epoch": 0.41,
"grad_norm": 0.925520658493042,
"learning_rate": 6.651049441651107e-06,
"loss": 0.6254,
"step": 6489
},
{
"epoch": 0.41,
"grad_norm": 0.8633304834365845,
"learning_rate": 6.65008096373355e-06,
"loss": 0.5776,
"step": 6490
},
{
"epoch": 0.41,
"grad_norm": 0.9169586300849915,
"learning_rate": 6.6491124163368215e-06,
"loss": 0.587,
"step": 6491
},
{
"epoch": 0.41,
"grad_norm": 0.815740168094635,
"learning_rate": 6.648143799501705e-06,
"loss": 0.5786,
"step": 6492
},
{
"epoch": 0.41,
"grad_norm": 0.9177011847496033,
"learning_rate": 6.647175113268989e-06,
"loss": 0.5998,
"step": 6493
},
{
"epoch": 0.41,
"grad_norm": 0.9129186868667603,
"learning_rate": 6.646206357679458e-06,
"loss": 0.6242,
"step": 6494
},
{
"epoch": 0.41,
"grad_norm": 0.8686244487762451,
"learning_rate": 6.645237532773902e-06,
"loss": 0.6423,
"step": 6495
},
{
"epoch": 0.41,
"grad_norm": 0.9115392565727234,
"learning_rate": 6.64426863859312e-06,
"loss": 0.6303,
"step": 6496
},
{
"epoch": 0.41,
"grad_norm": 0.9037183523178101,
"learning_rate": 6.643299675177906e-06,
"loss": 0.6147,
"step": 6497
},
{
"epoch": 0.41,
"grad_norm": 0.9037627577781677,
"learning_rate": 6.642330642569056e-06,
"loss": 0.5879,
"step": 6498
},
{
"epoch": 0.41,
"grad_norm": 0.8843808770179749,
"learning_rate": 6.641361540807377e-06,
"loss": 0.5843,
"step": 6499
},
{
"epoch": 0.41,
"grad_norm": 0.9096183180809021,
"learning_rate": 6.640392369933675e-06,
"loss": 0.5984,
"step": 6500
},
{
"epoch": 0.41,
"grad_norm": 0.9090222120285034,
"learning_rate": 6.639423129988756e-06,
"loss": 0.5941,
"step": 6501
},
{
"epoch": 0.41,
"grad_norm": 0.9428609609603882,
"learning_rate": 6.638453821013431e-06,
"loss": 0.6811,
"step": 6502
},
{
"epoch": 0.41,
"grad_norm": 0.8241065144538879,
"learning_rate": 6.637484443048516e-06,
"loss": 0.59,
"step": 6503
},
{
"epoch": 0.41,
"grad_norm": 0.8859769701957703,
"learning_rate": 6.636514996134828e-06,
"loss": 0.555,
"step": 6504
},
{
"epoch": 0.41,
"grad_norm": 0.9394935965538025,
"learning_rate": 6.635545480313187e-06,
"loss": 0.6377,
"step": 6505
},
{
"epoch": 0.41,
"grad_norm": 0.8997877240180969,
"learning_rate": 6.634575895624414e-06,
"loss": 0.66,
"step": 6506
},
{
"epoch": 0.41,
"grad_norm": 0.9390882253646851,
"learning_rate": 6.6336062421093374e-06,
"loss": 0.6316,
"step": 6507
},
{
"epoch": 0.41,
"grad_norm": 0.8696218729019165,
"learning_rate": 6.632636519808785e-06,
"loss": 0.6006,
"step": 6508
},
{
"epoch": 0.41,
"grad_norm": 0.9233937859535217,
"learning_rate": 6.6316667287635875e-06,
"loss": 0.6195,
"step": 6509
},
{
"epoch": 0.41,
"grad_norm": 0.9679466485977173,
"learning_rate": 6.63069686901458e-06,
"loss": 0.6293,
"step": 6510
},
{
"epoch": 0.41,
"grad_norm": 0.8663052320480347,
"learning_rate": 6.629726940602601e-06,
"loss": 0.6051,
"step": 6511
},
{
"epoch": 0.41,
"grad_norm": 0.8438270092010498,
"learning_rate": 6.62875694356849e-06,
"loss": 0.6315,
"step": 6512
},
{
"epoch": 0.41,
"grad_norm": 0.9236611723899841,
"learning_rate": 6.62778687795309e-06,
"loss": 0.564,
"step": 6513
},
{
"epoch": 0.41,
"grad_norm": 0.8759667873382568,
"learning_rate": 6.626816743797246e-06,
"loss": 0.5565,
"step": 6514
},
{
"epoch": 0.41,
"grad_norm": 0.8147273659706116,
"learning_rate": 6.62584654114181e-06,
"loss": 0.5434,
"step": 6515
},
{
"epoch": 0.41,
"grad_norm": 0.8937092423439026,
"learning_rate": 6.6248762700276315e-06,
"loss": 0.6153,
"step": 6516
},
{
"epoch": 0.41,
"grad_norm": 0.8718124032020569,
"learning_rate": 6.623905930495565e-06,
"loss": 0.5902,
"step": 6517
},
{
"epoch": 0.41,
"grad_norm": 1.0065981149673462,
"learning_rate": 6.622935522586469e-06,
"loss": 0.5592,
"step": 6518
},
{
"epoch": 0.41,
"grad_norm": 0.8148283362388611,
"learning_rate": 6.6219650463412034e-06,
"loss": 0.5861,
"step": 6519
},
{
"epoch": 0.41,
"grad_norm": 0.884833574295044,
"learning_rate": 6.620994501800634e-06,
"loss": 0.5903,
"step": 6520
},
{
"epoch": 0.41,
"grad_norm": 0.873306393623352,
"learning_rate": 6.620023889005624e-06,
"loss": 0.6419,
"step": 6521
},
{
"epoch": 0.41,
"grad_norm": 0.9364440441131592,
"learning_rate": 6.619053207997043e-06,
"loss": 0.6001,
"step": 6522
},
{
"epoch": 0.41,
"grad_norm": 0.9023630619049072,
"learning_rate": 6.618082458815765e-06,
"loss": 0.5793,
"step": 6523
},
{
"epoch": 0.41,
"grad_norm": 0.8948296904563904,
"learning_rate": 6.617111641502664e-06,
"loss": 0.58,
"step": 6524
},
{
"epoch": 0.41,
"grad_norm": 0.8921267986297607,
"learning_rate": 6.616140756098617e-06,
"loss": 0.5626,
"step": 6525
},
{
"epoch": 0.41,
"grad_norm": 0.8996078968048096,
"learning_rate": 6.615169802644503e-06,
"loss": 0.6441,
"step": 6526
},
{
"epoch": 0.41,
"grad_norm": 0.992561936378479,
"learning_rate": 6.614198781181209e-06,
"loss": 0.6163,
"step": 6527
},
{
"epoch": 0.41,
"grad_norm": 0.8944520354270935,
"learning_rate": 6.613227691749619e-06,
"loss": 0.6454,
"step": 6528
},
{
"epoch": 0.41,
"grad_norm": 0.9010036587715149,
"learning_rate": 6.612256534390624e-06,
"loss": 0.6159,
"step": 6529
},
{
"epoch": 0.41,
"grad_norm": 0.893017053604126,
"learning_rate": 6.611285309145113e-06,
"loss": 0.5674,
"step": 6530
},
{
"epoch": 0.41,
"grad_norm": 0.849725067615509,
"learning_rate": 6.610314016053986e-06,
"loss": 0.5657,
"step": 6531
},
{
"epoch": 0.41,
"grad_norm": 0.8771043419837952,
"learning_rate": 6.609342655158135e-06,
"loss": 0.6204,
"step": 6532
},
{
"epoch": 0.41,
"grad_norm": 0.9552651643753052,
"learning_rate": 6.608371226498464e-06,
"loss": 0.5664,
"step": 6533
},
{
"epoch": 0.41,
"grad_norm": 0.9059584140777588,
"learning_rate": 6.607399730115875e-06,
"loss": 0.612,
"step": 6534
},
{
"epoch": 0.41,
"grad_norm": 0.8679310083389282,
"learning_rate": 6.6064281660512775e-06,
"loss": 0.5264,
"step": 6535
},
{
"epoch": 0.41,
"grad_norm": 0.9331749081611633,
"learning_rate": 6.6054565343455765e-06,
"loss": 0.6195,
"step": 6536
},
{
"epoch": 0.41,
"grad_norm": 0.8616225719451904,
"learning_rate": 6.604484835039686e-06,
"loss": 0.5867,
"step": 6537
},
{
"epoch": 0.41,
"grad_norm": 0.8593981862068176,
"learning_rate": 6.603513068174521e-06,
"loss": 0.5371,
"step": 6538
},
{
"epoch": 0.41,
"grad_norm": 0.8332374095916748,
"learning_rate": 6.602541233790999e-06,
"loss": 0.5885,
"step": 6539
},
{
"epoch": 0.41,
"grad_norm": 0.9694592356681824,
"learning_rate": 6.601569331930041e-06,
"loss": 0.6602,
"step": 6540
},
{
"epoch": 0.41,
"grad_norm": 0.8334494829177856,
"learning_rate": 6.600597362632568e-06,
"loss": 0.5729,
"step": 6541
},
{
"epoch": 0.41,
"grad_norm": 0.8962088823318481,
"learning_rate": 6.599625325939509e-06,
"loss": 0.6357,
"step": 6542
},
{
"epoch": 0.41,
"grad_norm": 0.9451315402984619,
"learning_rate": 6.598653221891793e-06,
"loss": 0.619,
"step": 6543
},
{
"epoch": 0.41,
"grad_norm": 0.9206660389900208,
"learning_rate": 6.597681050530351e-06,
"loss": 0.6387,
"step": 6544
},
{
"epoch": 0.41,
"grad_norm": 0.8884252905845642,
"learning_rate": 6.596708811896116e-06,
"loss": 0.61,
"step": 6545
},
{
"epoch": 0.41,
"grad_norm": 0.8867802619934082,
"learning_rate": 6.595736506030029e-06,
"loss": 0.6128,
"step": 6546
},
{
"epoch": 0.41,
"grad_norm": 0.8310429453849792,
"learning_rate": 6.59476413297303e-06,
"loss": 0.5547,
"step": 6547
},
{
"epoch": 0.41,
"grad_norm": 0.9180542230606079,
"learning_rate": 6.59379169276606e-06,
"loss": 0.5834,
"step": 6548
},
{
"epoch": 0.41,
"grad_norm": 0.8704774379730225,
"learning_rate": 6.5928191854500644e-06,
"loss": 0.6258,
"step": 6549
},
{
"epoch": 0.41,
"grad_norm": 0.8992159962654114,
"learning_rate": 6.591846611065997e-06,
"loss": 0.5977,
"step": 6550
},
{
"epoch": 0.42,
"grad_norm": 0.8730959892272949,
"learning_rate": 6.590873969654805e-06,
"loss": 0.6295,
"step": 6551
},
{
"epoch": 0.42,
"grad_norm": 0.9002555012702942,
"learning_rate": 6.589901261257445e-06,
"loss": 0.6144,
"step": 6552
},
{
"epoch": 0.42,
"grad_norm": 0.9010111093521118,
"learning_rate": 6.588928485914871e-06,
"loss": 0.6662,
"step": 6553
},
{
"epoch": 0.42,
"grad_norm": 0.8698523044586182,
"learning_rate": 6.587955643668049e-06,
"loss": 0.6023,
"step": 6554
},
{
"epoch": 0.42,
"grad_norm": 0.9001327753067017,
"learning_rate": 6.58698273455794e-06,
"loss": 0.5513,
"step": 6555
},
{
"epoch": 0.42,
"grad_norm": 0.9791713953018188,
"learning_rate": 6.586009758625507e-06,
"loss": 0.6439,
"step": 6556
},
{
"epoch": 0.42,
"grad_norm": 0.9007930159568787,
"learning_rate": 6.585036715911719e-06,
"loss": 0.5951,
"step": 6557
},
{
"epoch": 0.42,
"grad_norm": 0.8678936958312988,
"learning_rate": 6.58406360645755e-06,
"loss": 0.6341,
"step": 6558
},
{
"epoch": 0.42,
"grad_norm": 0.8232488036155701,
"learning_rate": 6.583090430303975e-06,
"loss": 0.5596,
"step": 6559
},
{
"epoch": 0.42,
"grad_norm": 0.8638771772384644,
"learning_rate": 6.582117187491967e-06,
"loss": 0.6657,
"step": 6560
},
{
"epoch": 0.42,
"grad_norm": 0.9044223427772522,
"learning_rate": 6.581143878062507e-06,
"loss": 0.6091,
"step": 6561
},
{
"epoch": 0.42,
"grad_norm": 0.9236576557159424,
"learning_rate": 6.58017050205658e-06,
"loss": 0.5625,
"step": 6562
},
{
"epoch": 0.42,
"grad_norm": 0.9195695519447327,
"learning_rate": 6.5791970595151714e-06,
"loss": 0.6354,
"step": 6563
},
{
"epoch": 0.42,
"grad_norm": 0.8628154993057251,
"learning_rate": 6.578223550479266e-06,
"loss": 0.6041,
"step": 6564
},
{
"epoch": 0.42,
"grad_norm": 0.779208779335022,
"learning_rate": 6.5772499749898585e-06,
"loss": 0.555,
"step": 6565
},
{
"epoch": 0.42,
"grad_norm": 1.0027345418930054,
"learning_rate": 6.576276333087941e-06,
"loss": 0.6062,
"step": 6566
},
{
"epoch": 0.42,
"grad_norm": 0.8687159419059753,
"learning_rate": 6.575302624814512e-06,
"loss": 0.5796,
"step": 6567
},
{
"epoch": 0.42,
"grad_norm": 0.9297102093696594,
"learning_rate": 6.5743288502105675e-06,
"loss": 0.5697,
"step": 6568
},
{
"epoch": 0.42,
"grad_norm": 0.9116488099098206,
"learning_rate": 6.5733550093171115e-06,
"loss": 0.5924,
"step": 6569
},
{
"epoch": 0.42,
"grad_norm": 0.9103240370750427,
"learning_rate": 6.572381102175151e-06,
"loss": 0.5937,
"step": 6570
},
{
"epoch": 0.42,
"grad_norm": 0.8994322419166565,
"learning_rate": 6.571407128825692e-06,
"loss": 0.6327,
"step": 6571
},
{
"epoch": 0.42,
"grad_norm": 0.9073256850242615,
"learning_rate": 6.570433089309745e-06,
"loss": 0.5543,
"step": 6572
},
{
"epoch": 0.42,
"grad_norm": 0.902117133140564,
"learning_rate": 6.569458983668323e-06,
"loss": 0.5465,
"step": 6573
},
{
"epoch": 0.42,
"grad_norm": 0.8766512870788574,
"learning_rate": 6.5684848119424435e-06,
"loss": 0.6265,
"step": 6574
},
{
"epoch": 0.42,
"grad_norm": 0.8914992213249207,
"learning_rate": 6.567510574173126e-06,
"loss": 0.6145,
"step": 6575
},
{
"epoch": 0.42,
"grad_norm": 0.9038758277893066,
"learning_rate": 6.566536270401389e-06,
"loss": 0.6472,
"step": 6576
},
{
"epoch": 0.42,
"grad_norm": 0.9073125123977661,
"learning_rate": 6.5655619006682604e-06,
"loss": 0.6354,
"step": 6577
},
{
"epoch": 0.42,
"grad_norm": 0.8908482193946838,
"learning_rate": 6.5645874650147676e-06,
"loss": 0.603,
"step": 6578
},
{
"epoch": 0.42,
"grad_norm": 0.9067582488059998,
"learning_rate": 6.563612963481938e-06,
"loss": 0.5947,
"step": 6579
},
{
"epoch": 0.42,
"grad_norm": 0.926496148109436,
"learning_rate": 6.562638396110805e-06,
"loss": 0.5645,
"step": 6580
},
{
"epoch": 0.42,
"grad_norm": 1.002769112586975,
"learning_rate": 6.561663762942407e-06,
"loss": 0.6122,
"step": 6581
},
{
"epoch": 0.42,
"grad_norm": 1.0203826427459717,
"learning_rate": 6.560689064017781e-06,
"loss": 0.5591,
"step": 6582
},
{
"epoch": 0.42,
"grad_norm": 0.9004783034324646,
"learning_rate": 6.559714299377966e-06,
"loss": 0.6435,
"step": 6583
},
{
"epoch": 0.42,
"grad_norm": 0.8488182425498962,
"learning_rate": 6.558739469064008e-06,
"loss": 0.6716,
"step": 6584
},
{
"epoch": 0.42,
"grad_norm": 0.9152944087982178,
"learning_rate": 6.5577645731169535e-06,
"loss": 0.6506,
"step": 6585
},
{
"epoch": 0.42,
"grad_norm": 1.0391813516616821,
"learning_rate": 6.556789611577854e-06,
"loss": 0.6031,
"step": 6586
},
{
"epoch": 0.42,
"grad_norm": 0.8500710129737854,
"learning_rate": 6.555814584487757e-06,
"loss": 0.604,
"step": 6587
},
{
"epoch": 0.42,
"grad_norm": 0.9001255631446838,
"learning_rate": 6.5548394918877216e-06,
"loss": 0.6199,
"step": 6588
},
{
"epoch": 0.42,
"grad_norm": 0.8187232613563538,
"learning_rate": 6.553864333818803e-06,
"loss": 0.5698,
"step": 6589
},
{
"epoch": 0.42,
"grad_norm": 0.8661369681358337,
"learning_rate": 6.552889110322062e-06,
"loss": 0.568,
"step": 6590
},
{
"epoch": 0.42,
"grad_norm": 0.9107438325881958,
"learning_rate": 6.551913821438565e-06,
"loss": 0.5363,
"step": 6591
},
{
"epoch": 0.42,
"grad_norm": 0.8906565308570862,
"learning_rate": 6.550938467209375e-06,
"loss": 0.5933,
"step": 6592
},
{
"epoch": 0.42,
"grad_norm": 0.865942656993866,
"learning_rate": 6.5499630476755616e-06,
"loss": 0.5402,
"step": 6593
},
{
"epoch": 0.42,
"grad_norm": 0.862751305103302,
"learning_rate": 6.548987562878195e-06,
"loss": 0.5899,
"step": 6594
},
{
"epoch": 0.42,
"grad_norm": 0.8719815015792847,
"learning_rate": 6.548012012858352e-06,
"loss": 0.5804,
"step": 6595
},
{
"epoch": 0.42,
"grad_norm": 0.9156465530395508,
"learning_rate": 6.547036397657106e-06,
"loss": 0.5877,
"step": 6596
},
{
"epoch": 0.42,
"grad_norm": 0.9153836965560913,
"learning_rate": 6.546060717315542e-06,
"loss": 0.6035,
"step": 6597
},
{
"epoch": 0.42,
"grad_norm": 0.9377827644348145,
"learning_rate": 6.545084971874738e-06,
"loss": 0.6147,
"step": 6598
},
{
"epoch": 0.42,
"grad_norm": 0.9083762168884277,
"learning_rate": 6.5441091613757805e-06,
"loss": 0.5889,
"step": 6599
},
{
"epoch": 0.42,
"grad_norm": 0.870985746383667,
"learning_rate": 6.543133285859758e-06,
"loss": 0.5776,
"step": 6600
},
{
"epoch": 0.42,
"grad_norm": 0.8286287188529968,
"learning_rate": 6.542157345367763e-06,
"loss": 0.5341,
"step": 6601
},
{
"epoch": 0.42,
"grad_norm": 0.9526362419128418,
"learning_rate": 6.5411813399408845e-06,
"loss": 0.6605,
"step": 6602
},
{
"epoch": 0.42,
"grad_norm": 0.8954978585243225,
"learning_rate": 6.540205269620221e-06,
"loss": 0.6206,
"step": 6603
},
{
"epoch": 0.42,
"grad_norm": 0.8499834537506104,
"learning_rate": 6.539229134446874e-06,
"loss": 0.5997,
"step": 6604
},
{
"epoch": 0.42,
"grad_norm": 0.9309713840484619,
"learning_rate": 6.538252934461941e-06,
"loss": 0.5951,
"step": 6605
},
{
"epoch": 0.42,
"grad_norm": 0.9219299554824829,
"learning_rate": 6.537276669706527e-06,
"loss": 0.6351,
"step": 6606
},
{
"epoch": 0.42,
"grad_norm": 0.8586229085922241,
"learning_rate": 6.536300340221742e-06,
"loss": 0.6374,
"step": 6607
},
{
"epoch": 0.42,
"grad_norm": 0.8765944242477417,
"learning_rate": 6.535323946048695e-06,
"loss": 0.5754,
"step": 6608
},
{
"epoch": 0.42,
"grad_norm": 0.8662661910057068,
"learning_rate": 6.534347487228495e-06,
"loss": 0.5382,
"step": 6609
},
{
"epoch": 0.42,
"grad_norm": 0.8375385999679565,
"learning_rate": 6.533370963802261e-06,
"loss": 0.5487,
"step": 6610
},
{
"epoch": 0.42,
"grad_norm": 0.9295637011528015,
"learning_rate": 6.532394375811111e-06,
"loss": 0.6287,
"step": 6611
},
{
"epoch": 0.42,
"grad_norm": 0.8826519250869751,
"learning_rate": 6.531417723296164e-06,
"loss": 0.5982,
"step": 6612
},
{
"epoch": 0.42,
"grad_norm": 0.9090942740440369,
"learning_rate": 6.530441006298544e-06,
"loss": 0.6041,
"step": 6613
},
{
"epoch": 0.42,
"grad_norm": 0.8962193131446838,
"learning_rate": 6.5294642248593765e-06,
"loss": 0.6588,
"step": 6614
},
{
"epoch": 0.42,
"grad_norm": 0.9374716281890869,
"learning_rate": 6.528487379019791e-06,
"loss": 0.5957,
"step": 6615
},
{
"epoch": 0.42,
"grad_norm": 0.8533490300178528,
"learning_rate": 6.5275104688209215e-06,
"loss": 0.567,
"step": 6616
},
{
"epoch": 0.42,
"grad_norm": 0.9722062349319458,
"learning_rate": 6.526533494303898e-06,
"loss": 0.65,
"step": 6617
},
{
"epoch": 0.42,
"grad_norm": 0.9122631549835205,
"learning_rate": 6.525556455509858e-06,
"loss": 0.6388,
"step": 6618
},
{
"epoch": 0.42,
"grad_norm": 0.8122672438621521,
"learning_rate": 6.5245793524799465e-06,
"loss": 0.5713,
"step": 6619
},
{
"epoch": 0.42,
"grad_norm": 0.8519312739372253,
"learning_rate": 6.5236021852553e-06,
"loss": 0.633,
"step": 6620
},
{
"epoch": 0.42,
"grad_norm": 0.9361836910247803,
"learning_rate": 6.522624953877066e-06,
"loss": 0.6151,
"step": 6621
},
{
"epoch": 0.42,
"grad_norm": 0.832336962223053,
"learning_rate": 6.52164765838639e-06,
"loss": 0.545,
"step": 6622
},
{
"epoch": 0.42,
"grad_norm": 0.8808085322380066,
"learning_rate": 6.520670298824428e-06,
"loss": 0.5869,
"step": 6623
},
{
"epoch": 0.42,
"grad_norm": 0.9161610007286072,
"learning_rate": 6.519692875232328e-06,
"loss": 0.5429,
"step": 6624
},
{
"epoch": 0.42,
"grad_norm": 0.9123284220695496,
"learning_rate": 6.518715387651249e-06,
"loss": 0.6242,
"step": 6625
},
{
"epoch": 0.42,
"grad_norm": 0.8876476883888245,
"learning_rate": 6.517737836122345e-06,
"loss": 0.6074,
"step": 6626
},
{
"epoch": 0.42,
"grad_norm": 0.8674134016036987,
"learning_rate": 6.516760220686783e-06,
"loss": 0.6166,
"step": 6627
},
{
"epoch": 0.42,
"grad_norm": 0.9016671776771545,
"learning_rate": 6.515782541385725e-06,
"loss": 0.6589,
"step": 6628
},
{
"epoch": 0.42,
"grad_norm": 0.9042580127716064,
"learning_rate": 6.514804798260337e-06,
"loss": 0.6446,
"step": 6629
},
{
"epoch": 0.42,
"grad_norm": 0.8501339554786682,
"learning_rate": 6.513826991351786e-06,
"loss": 0.6305,
"step": 6630
},
{
"epoch": 0.42,
"grad_norm": 0.8709739446640015,
"learning_rate": 6.512849120701249e-06,
"loss": 0.5833,
"step": 6631
},
{
"epoch": 0.42,
"grad_norm": 0.8446660041809082,
"learning_rate": 6.511871186349897e-06,
"loss": 0.5731,
"step": 6632
},
{
"epoch": 0.42,
"grad_norm": 0.8911131620407104,
"learning_rate": 6.510893188338911e-06,
"loss": 0.6193,
"step": 6633
},
{
"epoch": 0.42,
"grad_norm": 0.8830543756484985,
"learning_rate": 6.509915126709467e-06,
"loss": 0.5538,
"step": 6634
},
{
"epoch": 0.42,
"grad_norm": 0.9163636565208435,
"learning_rate": 6.50893700150275e-06,
"loss": 0.5837,
"step": 6635
},
{
"epoch": 0.42,
"grad_norm": 0.8984601497650146,
"learning_rate": 6.5079588127599455e-06,
"loss": 0.5719,
"step": 6636
},
{
"epoch": 0.42,
"grad_norm": 0.8621581196784973,
"learning_rate": 6.50698056052224e-06,
"loss": 0.5852,
"step": 6637
},
{
"epoch": 0.42,
"grad_norm": 0.837208092212677,
"learning_rate": 6.506002244830827e-06,
"loss": 0.5823,
"step": 6638
},
{
"epoch": 0.42,
"grad_norm": 0.8576619029045105,
"learning_rate": 6.505023865726898e-06,
"loss": 0.5895,
"step": 6639
},
{
"epoch": 0.42,
"grad_norm": 0.8191852569580078,
"learning_rate": 6.50404542325165e-06,
"loss": 0.5928,
"step": 6640
},
{
"epoch": 0.42,
"grad_norm": 0.9108313322067261,
"learning_rate": 6.503066917446279e-06,
"loss": 0.5934,
"step": 6641
},
{
"epoch": 0.42,
"grad_norm": 0.867838442325592,
"learning_rate": 6.502088348351992e-06,
"loss": 0.5703,
"step": 6642
},
{
"epoch": 0.42,
"grad_norm": 0.8657822608947754,
"learning_rate": 6.501109716009988e-06,
"loss": 0.5505,
"step": 6643
},
{
"epoch": 0.42,
"grad_norm": 0.8648907542228699,
"learning_rate": 6.500131020461477e-06,
"loss": 0.5656,
"step": 6644
},
{
"epoch": 0.42,
"grad_norm": 0.8719663619995117,
"learning_rate": 6.4991522617476666e-06,
"loss": 0.5562,
"step": 6645
},
{
"epoch": 0.42,
"grad_norm": 0.8621832728385925,
"learning_rate": 6.498173439909771e-06,
"loss": 0.6282,
"step": 6646
},
{
"epoch": 0.42,
"grad_norm": 0.8455116748809814,
"learning_rate": 6.497194554989001e-06,
"loss": 0.5634,
"step": 6647
},
{
"epoch": 0.42,
"grad_norm": 0.8869051933288574,
"learning_rate": 6.496215607026579e-06,
"loss": 0.5434,
"step": 6648
},
{
"epoch": 0.42,
"grad_norm": 0.8986787796020508,
"learning_rate": 6.495236596063722e-06,
"loss": 0.5891,
"step": 6649
},
{
"epoch": 0.42,
"grad_norm": 0.8959378004074097,
"learning_rate": 6.494257522141654e-06,
"loss": 0.5889,
"step": 6650
},
{
"epoch": 0.42,
"grad_norm": 1.0358039140701294,
"learning_rate": 6.4932783853016005e-06,
"loss": 0.6392,
"step": 6651
},
{
"epoch": 0.42,
"grad_norm": 0.8593500256538391,
"learning_rate": 6.492299185584787e-06,
"loss": 0.5139,
"step": 6652
},
{
"epoch": 0.42,
"grad_norm": 0.9131850600242615,
"learning_rate": 6.491319923032446e-06,
"loss": 0.5909,
"step": 6653
},
{
"epoch": 0.42,
"grad_norm": 0.8696557283401489,
"learning_rate": 6.490340597685811e-06,
"loss": 0.5991,
"step": 6654
},
{
"epoch": 0.42,
"grad_norm": 0.8535944819450378,
"learning_rate": 6.48936120958612e-06,
"loss": 0.5772,
"step": 6655
},
{
"epoch": 0.42,
"grad_norm": 0.8401122689247131,
"learning_rate": 6.488381758774609e-06,
"loss": 0.6119,
"step": 6656
},
{
"epoch": 0.42,
"grad_norm": 0.8727520704269409,
"learning_rate": 6.487402245292518e-06,
"loss": 0.5869,
"step": 6657
},
{
"epoch": 0.42,
"grad_norm": 0.9223040342330933,
"learning_rate": 6.486422669181094e-06,
"loss": 0.6218,
"step": 6658
},
{
"epoch": 0.42,
"grad_norm": 0.868571937084198,
"learning_rate": 6.485443030481583e-06,
"loss": 0.607,
"step": 6659
},
{
"epoch": 0.42,
"grad_norm": 0.9342830777168274,
"learning_rate": 6.4844633292352335e-06,
"loss": 0.6237,
"step": 6660
},
{
"epoch": 0.42,
"grad_norm": 0.8326634764671326,
"learning_rate": 6.483483565483295e-06,
"loss": 0.5574,
"step": 6661
},
{
"epoch": 0.42,
"grad_norm": 0.8713539242744446,
"learning_rate": 6.482503739267026e-06,
"loss": 0.5629,
"step": 6662
},
{
"epoch": 0.42,
"grad_norm": 0.8934717178344727,
"learning_rate": 6.481523850627682e-06,
"loss": 0.63,
"step": 6663
},
{
"epoch": 0.42,
"grad_norm": 0.9452871084213257,
"learning_rate": 6.4805438996065215e-06,
"loss": 0.6842,
"step": 6664
},
{
"epoch": 0.42,
"grad_norm": 0.8760863542556763,
"learning_rate": 6.479563886244809e-06,
"loss": 0.6127,
"step": 6665
},
{
"epoch": 0.42,
"grad_norm": 0.901567280292511,
"learning_rate": 6.478583810583807e-06,
"loss": 0.5937,
"step": 6666
},
{
"epoch": 0.42,
"grad_norm": 0.9208518266677856,
"learning_rate": 6.477603672664785e-06,
"loss": 0.5968,
"step": 6667
},
{
"epoch": 0.42,
"grad_norm": 0.8600721955299377,
"learning_rate": 6.476623472529012e-06,
"loss": 0.5893,
"step": 6668
},
{
"epoch": 0.42,
"grad_norm": 0.8686032295227051,
"learning_rate": 6.475643210217762e-06,
"loss": 0.6386,
"step": 6669
},
{
"epoch": 0.42,
"grad_norm": 0.9031897187232971,
"learning_rate": 6.47466288577231e-06,
"loss": 0.6378,
"step": 6670
},
{
"epoch": 0.42,
"grad_norm": 0.82213294506073,
"learning_rate": 6.473682499233934e-06,
"loss": 0.5587,
"step": 6671
},
{
"epoch": 0.42,
"grad_norm": 0.83423912525177,
"learning_rate": 6.472702050643913e-06,
"loss": 0.5684,
"step": 6672
},
{
"epoch": 0.42,
"grad_norm": 0.9469904899597168,
"learning_rate": 6.471721540043533e-06,
"loss": 0.5984,
"step": 6673
},
{
"epoch": 0.42,
"grad_norm": 0.9193524718284607,
"learning_rate": 6.47074096747408e-06,
"loss": 0.6047,
"step": 6674
},
{
"epoch": 0.42,
"grad_norm": 0.8525941967964172,
"learning_rate": 6.469760332976839e-06,
"loss": 0.5274,
"step": 6675
},
{
"epoch": 0.42,
"grad_norm": 0.9107722043991089,
"learning_rate": 6.4687796365931035e-06,
"loss": 0.5995,
"step": 6676
},
{
"epoch": 0.42,
"grad_norm": 0.8921878933906555,
"learning_rate": 6.467798878364168e-06,
"loss": 0.589,
"step": 6677
},
{
"epoch": 0.42,
"grad_norm": 0.8674684166908264,
"learning_rate": 6.466818058331328e-06,
"loss": 0.5953,
"step": 6678
},
{
"epoch": 0.42,
"grad_norm": 0.9107003211975098,
"learning_rate": 6.465837176535881e-06,
"loss": 0.5684,
"step": 6679
},
{
"epoch": 0.42,
"grad_norm": 0.8787494897842407,
"learning_rate": 6.46485623301913e-06,
"loss": 0.5651,
"step": 6680
},
{
"epoch": 0.42,
"grad_norm": 0.8581385016441345,
"learning_rate": 6.46387522782238e-06,
"loss": 0.6103,
"step": 6681
},
{
"epoch": 0.42,
"grad_norm": 0.8561550378799438,
"learning_rate": 6.462894160986937e-06,
"loss": 0.6005,
"step": 6682
},
{
"epoch": 0.42,
"grad_norm": 0.912476122379303,
"learning_rate": 6.461913032554108e-06,
"loss": 0.5908,
"step": 6683
},
{
"epoch": 0.42,
"grad_norm": 0.8894026875495911,
"learning_rate": 6.460931842565207e-06,
"loss": 0.59,
"step": 6684
},
{
"epoch": 0.42,
"grad_norm": 0.8487771153450012,
"learning_rate": 6.4599505910615505e-06,
"loss": 0.5819,
"step": 6685
},
{
"epoch": 0.42,
"grad_norm": 0.8792235255241394,
"learning_rate": 6.45896927808445e-06,
"loss": 0.5918,
"step": 6686
},
{
"epoch": 0.42,
"grad_norm": 0.8825286626815796,
"learning_rate": 6.4579879036752315e-06,
"loss": 0.6375,
"step": 6687
},
{
"epoch": 0.42,
"grad_norm": 0.857980489730835,
"learning_rate": 6.457006467875213e-06,
"loss": 0.5757,
"step": 6688
},
{
"epoch": 0.42,
"grad_norm": 0.9466935396194458,
"learning_rate": 6.456024970725722e-06,
"loss": 0.5619,
"step": 6689
},
{
"epoch": 0.42,
"grad_norm": 0.9351262450218201,
"learning_rate": 6.455043412268083e-06,
"loss": 0.5754,
"step": 6690
},
{
"epoch": 0.42,
"grad_norm": 0.9212837815284729,
"learning_rate": 6.4540617925436275e-06,
"loss": 0.5439,
"step": 6691
},
{
"epoch": 0.42,
"grad_norm": 0.8872720003128052,
"learning_rate": 6.453080111593689e-06,
"loss": 0.6074,
"step": 6692
},
{
"epoch": 0.42,
"grad_norm": 0.8125371336936951,
"learning_rate": 6.4520983694596025e-06,
"loss": 0.546,
"step": 6693
},
{
"epoch": 0.42,
"grad_norm": 0.9116303324699402,
"learning_rate": 6.451116566182704e-06,
"loss": 0.5957,
"step": 6694
},
{
"epoch": 0.42,
"grad_norm": 0.8791963458061218,
"learning_rate": 6.4501347018043356e-06,
"loss": 0.5968,
"step": 6695
},
{
"epoch": 0.42,
"grad_norm": 0.9115133285522461,
"learning_rate": 6.44915277636584e-06,
"loss": 0.6176,
"step": 6696
},
{
"epoch": 0.42,
"grad_norm": 0.9027078151702881,
"learning_rate": 6.4481707899085624e-06,
"loss": 0.5877,
"step": 6697
},
{
"epoch": 0.42,
"grad_norm": 0.9268640279769897,
"learning_rate": 6.447188742473849e-06,
"loss": 0.6045,
"step": 6698
},
{
"epoch": 0.42,
"grad_norm": 0.9698523879051208,
"learning_rate": 6.446206634103053e-06,
"loss": 0.6128,
"step": 6699
},
{
"epoch": 0.42,
"grad_norm": 0.8391386866569519,
"learning_rate": 6.445224464837527e-06,
"loss": 0.632,
"step": 6700
},
{
"epoch": 0.42,
"grad_norm": 0.8128573894500732,
"learning_rate": 6.444242234718626e-06,
"loss": 0.5394,
"step": 6701
},
{
"epoch": 0.42,
"grad_norm": 0.8981994986534119,
"learning_rate": 6.443259943787708e-06,
"loss": 0.613,
"step": 6702
},
{
"epoch": 0.42,
"grad_norm": 0.8788542747497559,
"learning_rate": 6.442277592086135e-06,
"loss": 0.6071,
"step": 6703
},
{
"epoch": 0.42,
"grad_norm": 0.8909327387809753,
"learning_rate": 6.4412951796552715e-06,
"loss": 0.643,
"step": 6704
},
{
"epoch": 0.42,
"grad_norm": 0.8961040377616882,
"learning_rate": 6.44031270653648e-06,
"loss": 0.6128,
"step": 6705
},
{
"epoch": 0.42,
"grad_norm": 0.9458435773849487,
"learning_rate": 6.4393301727711296e-06,
"loss": 0.6847,
"step": 6706
},
{
"epoch": 0.42,
"grad_norm": 0.9194031357765198,
"learning_rate": 6.438347578400595e-06,
"loss": 0.6484,
"step": 6707
},
{
"epoch": 0.42,
"grad_norm": 0.9502468705177307,
"learning_rate": 6.437364923466247e-06,
"loss": 0.6456,
"step": 6708
},
{
"epoch": 0.43,
"grad_norm": 0.8829242587089539,
"learning_rate": 6.436382208009463e-06,
"loss": 0.64,
"step": 6709
},
{
"epoch": 0.43,
"grad_norm": 0.9908746480941772,
"learning_rate": 6.43539943207162e-06,
"loss": 0.6066,
"step": 6710
},
{
"epoch": 0.43,
"grad_norm": 0.8264694213867188,
"learning_rate": 6.434416595694102e-06,
"loss": 0.5476,
"step": 6711
},
{
"epoch": 0.43,
"grad_norm": 0.8517434000968933,
"learning_rate": 6.433433698918289e-06,
"loss": 0.5964,
"step": 6712
},
{
"epoch": 0.43,
"grad_norm": 0.8980498909950256,
"learning_rate": 6.432450741785571e-06,
"loss": 0.5834,
"step": 6713
},
{
"epoch": 0.43,
"grad_norm": 0.9315409660339355,
"learning_rate": 6.431467724337332e-06,
"loss": 0.5854,
"step": 6714
},
{
"epoch": 0.43,
"grad_norm": 0.9002824425697327,
"learning_rate": 6.430484646614971e-06,
"loss": 0.7002,
"step": 6715
},
{
"epoch": 0.43,
"grad_norm": 0.8017958998680115,
"learning_rate": 6.429501508659877e-06,
"loss": 0.555,
"step": 6716
},
{
"epoch": 0.43,
"grad_norm": 0.87758469581604,
"learning_rate": 6.428518310513446e-06,
"loss": 0.6322,
"step": 6717
},
{
"epoch": 0.43,
"grad_norm": 0.9012987613677979,
"learning_rate": 6.427535052217078e-06,
"loss": 0.6406,
"step": 6718
},
{
"epoch": 0.43,
"grad_norm": 0.8559851050376892,
"learning_rate": 6.4265517338121764e-06,
"loss": 0.6203,
"step": 6719
},
{
"epoch": 0.43,
"grad_norm": 0.8328604102134705,
"learning_rate": 6.4255683553401435e-06,
"loss": 0.6212,
"step": 6720
},
{
"epoch": 0.43,
"grad_norm": 0.8184729814529419,
"learning_rate": 6.424584916842387e-06,
"loss": 0.5646,
"step": 6721
},
{
"epoch": 0.43,
"grad_norm": 0.8883371353149414,
"learning_rate": 6.423601418360314e-06,
"loss": 0.585,
"step": 6722
},
{
"epoch": 0.43,
"grad_norm": 0.9067890644073486,
"learning_rate": 6.4226178599353385e-06,
"loss": 0.6143,
"step": 6723
},
{
"epoch": 0.43,
"grad_norm": 0.9497420787811279,
"learning_rate": 6.421634241608874e-06,
"loss": 0.6008,
"step": 6724
},
{
"epoch": 0.43,
"grad_norm": 0.8506911396980286,
"learning_rate": 6.420650563422337e-06,
"loss": 0.5696,
"step": 6725
},
{
"epoch": 0.43,
"grad_norm": 0.8900071978569031,
"learning_rate": 6.419666825417147e-06,
"loss": 0.6127,
"step": 6726
},
{
"epoch": 0.43,
"grad_norm": 0.8769127726554871,
"learning_rate": 6.4186830276347246e-06,
"loss": 0.582,
"step": 6727
},
{
"epoch": 0.43,
"grad_norm": 0.9167845249176025,
"learning_rate": 6.417699170116497e-06,
"loss": 0.6236,
"step": 6728
},
{
"epoch": 0.43,
"grad_norm": 0.8981141448020935,
"learning_rate": 6.416715252903888e-06,
"loss": 0.6296,
"step": 6729
},
{
"epoch": 0.43,
"grad_norm": 0.8629951477050781,
"learning_rate": 6.415731276038327e-06,
"loss": 0.5923,
"step": 6730
},
{
"epoch": 0.43,
"grad_norm": 0.8666777014732361,
"learning_rate": 6.414747239561249e-06,
"loss": 0.616,
"step": 6731
},
{
"epoch": 0.43,
"grad_norm": 0.9089431762695312,
"learning_rate": 6.413763143514086e-06,
"loss": 0.6325,
"step": 6732
},
{
"epoch": 0.43,
"grad_norm": 0.9420037865638733,
"learning_rate": 6.412778987938273e-06,
"loss": 0.6228,
"step": 6733
},
{
"epoch": 0.43,
"grad_norm": 0.9398850202560425,
"learning_rate": 6.411794772875253e-06,
"loss": 0.6237,
"step": 6734
},
{
"epoch": 0.43,
"grad_norm": 0.842210054397583,
"learning_rate": 6.4108104983664665e-06,
"loss": 0.5431,
"step": 6735
},
{
"epoch": 0.43,
"grad_norm": 0.8711762428283691,
"learning_rate": 6.409826164453359e-06,
"loss": 0.5273,
"step": 6736
},
{
"epoch": 0.43,
"grad_norm": 0.9114711284637451,
"learning_rate": 6.408841771177373e-06,
"loss": 0.6513,
"step": 6737
},
{
"epoch": 0.43,
"grad_norm": 0.86746746301651,
"learning_rate": 6.407857318579963e-06,
"loss": 0.5329,
"step": 6738
},
{
"epoch": 0.43,
"grad_norm": 0.8684642314910889,
"learning_rate": 6.4068728067025785e-06,
"loss": 0.6305,
"step": 6739
},
{
"epoch": 0.43,
"grad_norm": 0.8810404539108276,
"learning_rate": 6.405888235586676e-06,
"loss": 0.5681,
"step": 6740
},
{
"epoch": 0.43,
"grad_norm": 0.9271003603935242,
"learning_rate": 6.4049036052737065e-06,
"loss": 0.6079,
"step": 6741
},
{
"epoch": 0.43,
"grad_norm": 0.9591821432113647,
"learning_rate": 6.403918915805138e-06,
"loss": 0.5997,
"step": 6742
},
{
"epoch": 0.43,
"grad_norm": 0.939398467540741,
"learning_rate": 6.402934167222427e-06,
"loss": 0.6271,
"step": 6743
},
{
"epoch": 0.43,
"grad_norm": 0.8856723308563232,
"learning_rate": 6.4019493595670365e-06,
"loss": 0.5538,
"step": 6744
},
{
"epoch": 0.43,
"grad_norm": 0.8420456051826477,
"learning_rate": 6.400964492880437e-06,
"loss": 0.6144,
"step": 6745
},
{
"epoch": 0.43,
"grad_norm": 0.9057135581970215,
"learning_rate": 6.399979567204096e-06,
"loss": 0.6294,
"step": 6746
},
{
"epoch": 0.43,
"grad_norm": 0.8914698958396912,
"learning_rate": 6.398994582579485e-06,
"loss": 0.6168,
"step": 6747
},
{
"epoch": 0.43,
"grad_norm": 0.7790830135345459,
"learning_rate": 6.39800953904808e-06,
"loss": 0.563,
"step": 6748
},
{
"epoch": 0.43,
"grad_norm": 0.8107707500457764,
"learning_rate": 6.397024436651356e-06,
"loss": 0.5594,
"step": 6749
},
{
"epoch": 0.43,
"grad_norm": 0.9206598401069641,
"learning_rate": 6.396039275430792e-06,
"loss": 0.6187,
"step": 6750
},
{
"epoch": 0.43,
"grad_norm": 0.973175048828125,
"learning_rate": 6.395054055427872e-06,
"loss": 0.6636,
"step": 6751
},
{
"epoch": 0.43,
"grad_norm": 0.8376911878585815,
"learning_rate": 6.394068776684078e-06,
"loss": 0.5941,
"step": 6752
},
{
"epoch": 0.43,
"grad_norm": 0.8403307795524597,
"learning_rate": 6.393083439240897e-06,
"loss": 0.5494,
"step": 6753
},
{
"epoch": 0.43,
"grad_norm": 0.8593806028366089,
"learning_rate": 6.39209804313982e-06,
"loss": 0.5894,
"step": 6754
},
{
"epoch": 0.43,
"grad_norm": 0.81999671459198,
"learning_rate": 6.391112588422337e-06,
"loss": 0.5574,
"step": 6755
},
{
"epoch": 0.43,
"grad_norm": 0.917597770690918,
"learning_rate": 6.390127075129941e-06,
"loss": 0.556,
"step": 6756
},
{
"epoch": 0.43,
"grad_norm": 0.9527899026870728,
"learning_rate": 6.38914150330413e-06,
"loss": 0.6124,
"step": 6757
},
{
"epoch": 0.43,
"grad_norm": 0.9341422319412231,
"learning_rate": 6.388155872986404e-06,
"loss": 0.5947,
"step": 6758
},
{
"epoch": 0.43,
"grad_norm": 0.8585883378982544,
"learning_rate": 6.3871701842182625e-06,
"loss": 0.579,
"step": 6759
},
{
"epoch": 0.43,
"grad_norm": 0.8847423195838928,
"learning_rate": 6.386184437041208e-06,
"loss": 0.6301,
"step": 6760
},
{
"epoch": 0.43,
"grad_norm": 0.9007404446601868,
"learning_rate": 6.385198631496752e-06,
"loss": 0.6414,
"step": 6761
},
{
"epoch": 0.43,
"grad_norm": 0.8374068140983582,
"learning_rate": 6.3842127676263995e-06,
"loss": 0.5746,
"step": 6762
},
{
"epoch": 0.43,
"grad_norm": 0.8941364884376526,
"learning_rate": 6.383226845471663e-06,
"loss": 0.6523,
"step": 6763
},
{
"epoch": 0.43,
"grad_norm": 0.8259331583976746,
"learning_rate": 6.382240865074055e-06,
"loss": 0.5926,
"step": 6764
},
{
"epoch": 0.43,
"grad_norm": 0.8802745342254639,
"learning_rate": 6.381254826475093e-06,
"loss": 0.5734,
"step": 6765
},
{
"epoch": 0.43,
"grad_norm": 0.8371772766113281,
"learning_rate": 6.380268729716296e-06,
"loss": 0.634,
"step": 6766
},
{
"epoch": 0.43,
"grad_norm": 0.9037656188011169,
"learning_rate": 6.379282574839184e-06,
"loss": 0.6121,
"step": 6767
},
{
"epoch": 0.43,
"grad_norm": 0.8730578422546387,
"learning_rate": 6.37829636188528e-06,
"loss": 0.5665,
"step": 6768
},
{
"epoch": 0.43,
"grad_norm": 0.9292997717857361,
"learning_rate": 6.377310090896112e-06,
"loss": 0.5958,
"step": 6769
},
{
"epoch": 0.43,
"grad_norm": 0.9569960236549377,
"learning_rate": 6.376323761913208e-06,
"loss": 0.5732,
"step": 6770
},
{
"epoch": 0.43,
"grad_norm": 0.8929063081741333,
"learning_rate": 6.375337374978097e-06,
"loss": 0.6509,
"step": 6771
},
{
"epoch": 0.43,
"grad_norm": 0.9145770072937012,
"learning_rate": 6.374350930132313e-06,
"loss": 0.6502,
"step": 6772
},
{
"epoch": 0.43,
"grad_norm": 0.8785668015480042,
"learning_rate": 6.373364427417395e-06,
"loss": 0.6297,
"step": 6773
},
{
"epoch": 0.43,
"grad_norm": 0.8315816521644592,
"learning_rate": 6.372377866874876e-06,
"loss": 0.5375,
"step": 6774
},
{
"epoch": 0.43,
"grad_norm": 0.8660714030265808,
"learning_rate": 6.371391248546299e-06,
"loss": 0.5814,
"step": 6775
},
{
"epoch": 0.43,
"grad_norm": 0.801703691482544,
"learning_rate": 6.370404572473209e-06,
"loss": 0.6316,
"step": 6776
},
{
"epoch": 0.43,
"grad_norm": 0.9656221270561218,
"learning_rate": 6.36941783869715e-06,
"loss": 0.6798,
"step": 6777
},
{
"epoch": 0.43,
"grad_norm": 0.8311265707015991,
"learning_rate": 6.368431047259668e-06,
"loss": 0.5343,
"step": 6778
},
{
"epoch": 0.43,
"grad_norm": 0.9228345155715942,
"learning_rate": 6.367444198202315e-06,
"loss": 0.6175,
"step": 6779
},
{
"epoch": 0.43,
"grad_norm": 0.9040692448616028,
"learning_rate": 6.366457291566645e-06,
"loss": 0.6427,
"step": 6780
},
{
"epoch": 0.43,
"grad_norm": 0.8820178508758545,
"learning_rate": 6.365470327394212e-06,
"loss": 0.6016,
"step": 6781
},
{
"epoch": 0.43,
"grad_norm": 0.8262830376625061,
"learning_rate": 6.3644833057265735e-06,
"loss": 0.615,
"step": 6782
},
{
"epoch": 0.43,
"grad_norm": 0.8388856053352356,
"learning_rate": 6.363496226605289e-06,
"loss": 0.5929,
"step": 6783
},
{
"epoch": 0.43,
"grad_norm": 0.9199455380439758,
"learning_rate": 6.362509090071922e-06,
"loss": 0.5725,
"step": 6784
},
{
"epoch": 0.43,
"grad_norm": 0.9267382621765137,
"learning_rate": 6.361521896168037e-06,
"loss": 0.6032,
"step": 6785
},
{
"epoch": 0.43,
"grad_norm": 0.858314573764801,
"learning_rate": 6.360534644935201e-06,
"loss": 0.6036,
"step": 6786
},
{
"epoch": 0.43,
"grad_norm": 0.8850862979888916,
"learning_rate": 6.359547336414985e-06,
"loss": 0.5966,
"step": 6787
},
{
"epoch": 0.43,
"grad_norm": 0.910456657409668,
"learning_rate": 6.358559970648958e-06,
"loss": 0.6172,
"step": 6788
},
{
"epoch": 0.43,
"grad_norm": 0.8374682068824768,
"learning_rate": 6.357572547678701e-06,
"loss": 0.5973,
"step": 6789
},
{
"epoch": 0.43,
"grad_norm": 0.8853792548179626,
"learning_rate": 6.356585067545784e-06,
"loss": 0.5811,
"step": 6790
},
{
"epoch": 0.43,
"grad_norm": 0.8989521861076355,
"learning_rate": 6.355597530291788e-06,
"loss": 0.6074,
"step": 6791
},
{
"epoch": 0.43,
"grad_norm": 0.9015896916389465,
"learning_rate": 6.354609935958298e-06,
"loss": 0.5856,
"step": 6792
},
{
"epoch": 0.43,
"grad_norm": 0.937824010848999,
"learning_rate": 6.3536222845868934e-06,
"loss": 0.6377,
"step": 6793
},
{
"epoch": 0.43,
"grad_norm": 0.9121703505516052,
"learning_rate": 6.3526345762191656e-06,
"loss": 0.6304,
"step": 6794
},
{
"epoch": 0.43,
"grad_norm": 0.8476263284683228,
"learning_rate": 6.351646810896699e-06,
"loss": 0.5777,
"step": 6795
},
{
"epoch": 0.43,
"grad_norm": 0.8620879650115967,
"learning_rate": 6.350658988661089e-06,
"loss": 0.5876,
"step": 6796
},
{
"epoch": 0.43,
"grad_norm": 0.8748513460159302,
"learning_rate": 6.349671109553928e-06,
"loss": 0.5557,
"step": 6797
},
{
"epoch": 0.43,
"grad_norm": 0.9378863573074341,
"learning_rate": 6.348683173616811e-06,
"loss": 0.5726,
"step": 6798
},
{
"epoch": 0.43,
"grad_norm": 0.8460593223571777,
"learning_rate": 6.347695180891337e-06,
"loss": 0.6111,
"step": 6799
},
{
"epoch": 0.43,
"grad_norm": 0.8482157588005066,
"learning_rate": 6.346707131419108e-06,
"loss": 0.5726,
"step": 6800
},
{
"epoch": 0.43,
"grad_norm": 0.935832142829895,
"learning_rate": 6.345719025241725e-06,
"loss": 0.5852,
"step": 6801
},
{
"epoch": 0.43,
"grad_norm": 0.8839829564094543,
"learning_rate": 6.3447308624007964e-06,
"loss": 0.6138,
"step": 6802
},
{
"epoch": 0.43,
"grad_norm": 0.9015828967094421,
"learning_rate": 6.343742642937929e-06,
"loss": 0.5436,
"step": 6803
},
{
"epoch": 0.43,
"grad_norm": 0.925391674041748,
"learning_rate": 6.342754366894735e-06,
"loss": 0.6357,
"step": 6804
},
{
"epoch": 0.43,
"grad_norm": 0.8897901177406311,
"learning_rate": 6.341766034312824e-06,
"loss": 0.6055,
"step": 6805
},
{
"epoch": 0.43,
"grad_norm": 0.9219132661819458,
"learning_rate": 6.340777645233811e-06,
"loss": 0.6218,
"step": 6806
},
{
"epoch": 0.43,
"grad_norm": 0.8790163993835449,
"learning_rate": 6.339789199699319e-06,
"loss": 0.5795,
"step": 6807
},
{
"epoch": 0.43,
"grad_norm": 0.860368549823761,
"learning_rate": 6.338800697750963e-06,
"loss": 0.5757,
"step": 6808
},
{
"epoch": 0.43,
"grad_norm": 0.8733096718788147,
"learning_rate": 6.337812139430368e-06,
"loss": 0.5503,
"step": 6809
},
{
"epoch": 0.43,
"grad_norm": 0.9531643986701965,
"learning_rate": 6.336823524779155e-06,
"loss": 0.6141,
"step": 6810
},
{
"epoch": 0.43,
"grad_norm": 0.8316904902458191,
"learning_rate": 6.335834853838957e-06,
"loss": 0.5442,
"step": 6811
},
{
"epoch": 0.43,
"grad_norm": 0.9344193339347839,
"learning_rate": 6.334846126651399e-06,
"loss": 0.6328,
"step": 6812
},
{
"epoch": 0.43,
"grad_norm": 0.9671064615249634,
"learning_rate": 6.333857343258115e-06,
"loss": 0.6196,
"step": 6813
},
{
"epoch": 0.43,
"grad_norm": 0.9183486104011536,
"learning_rate": 6.3328685037007365e-06,
"loss": 0.5904,
"step": 6814
},
{
"epoch": 0.43,
"grad_norm": 0.9257077574729919,
"learning_rate": 6.331879608020905e-06,
"loss": 0.5997,
"step": 6815
},
{
"epoch": 0.43,
"grad_norm": 0.9391463398933411,
"learning_rate": 6.330890656260253e-06,
"loss": 0.6602,
"step": 6816
},
{
"epoch": 0.43,
"grad_norm": 0.9092316627502441,
"learning_rate": 6.329901648460428e-06,
"loss": 0.6427,
"step": 6817
},
{
"epoch": 0.43,
"grad_norm": 0.9237379431724548,
"learning_rate": 6.32891258466307e-06,
"loss": 0.6052,
"step": 6818
},
{
"epoch": 0.43,
"grad_norm": 0.8574221134185791,
"learning_rate": 6.3279234649098265e-06,
"loss": 0.5748,
"step": 6819
},
{
"epoch": 0.43,
"grad_norm": 0.9020368456840515,
"learning_rate": 6.326934289242346e-06,
"loss": 0.5697,
"step": 6820
},
{
"epoch": 0.43,
"grad_norm": 0.9599592685699463,
"learning_rate": 6.325945057702276e-06,
"loss": 0.6449,
"step": 6821
},
{
"epoch": 0.43,
"grad_norm": 0.8588045239448547,
"learning_rate": 6.324955770331274e-06,
"loss": 0.6282,
"step": 6822
},
{
"epoch": 0.43,
"grad_norm": 0.8139827847480774,
"learning_rate": 6.323966427170993e-06,
"loss": 0.5639,
"step": 6823
},
{
"epoch": 0.43,
"grad_norm": 0.9147988557815552,
"learning_rate": 6.322977028263093e-06,
"loss": 0.6103,
"step": 6824
},
{
"epoch": 0.43,
"grad_norm": 0.9550712704658508,
"learning_rate": 6.321987573649232e-06,
"loss": 0.5802,
"step": 6825
},
{
"epoch": 0.43,
"grad_norm": 0.8623383045196533,
"learning_rate": 6.320998063371072e-06,
"loss": 0.5587,
"step": 6826
},
{
"epoch": 0.43,
"grad_norm": 0.8975523710250854,
"learning_rate": 6.320008497470281e-06,
"loss": 0.6382,
"step": 6827
},
{
"epoch": 0.43,
"grad_norm": 0.9629261493682861,
"learning_rate": 6.319018875988523e-06,
"loss": 0.6272,
"step": 6828
},
{
"epoch": 0.43,
"grad_norm": 0.9650130271911621,
"learning_rate": 6.318029198967468e-06,
"loss": 0.6143,
"step": 6829
},
{
"epoch": 0.43,
"grad_norm": 0.9213373064994812,
"learning_rate": 6.317039466448789e-06,
"loss": 0.6329,
"step": 6830
},
{
"epoch": 0.43,
"grad_norm": 0.8667360544204712,
"learning_rate": 6.316049678474159e-06,
"loss": 0.5997,
"step": 6831
},
{
"epoch": 0.43,
"grad_norm": 0.9180268049240112,
"learning_rate": 6.315059835085257e-06,
"loss": 0.5874,
"step": 6832
},
{
"epoch": 0.43,
"grad_norm": 0.9233614802360535,
"learning_rate": 6.314069936323759e-06,
"loss": 0.5789,
"step": 6833
},
{
"epoch": 0.43,
"grad_norm": 0.8247601985931396,
"learning_rate": 6.313079982231347e-06,
"loss": 0.5741,
"step": 6834
},
{
"epoch": 0.43,
"grad_norm": 0.893379807472229,
"learning_rate": 6.312089972849707e-06,
"loss": 0.615,
"step": 6835
},
{
"epoch": 0.43,
"grad_norm": 0.8536086082458496,
"learning_rate": 6.31109990822052e-06,
"loss": 0.6018,
"step": 6836
},
{
"epoch": 0.43,
"grad_norm": 0.8153089284896851,
"learning_rate": 6.3101097883854765e-06,
"loss": 0.6091,
"step": 6837
},
{
"epoch": 0.43,
"grad_norm": 0.9722812175750732,
"learning_rate": 6.30911961338627e-06,
"loss": 0.5822,
"step": 6838
},
{
"epoch": 0.43,
"grad_norm": 0.8883670568466187,
"learning_rate": 6.3081293832645896e-06,
"loss": 0.5889,
"step": 6839
},
{
"epoch": 0.43,
"grad_norm": 0.9067282676696777,
"learning_rate": 6.30713909806213e-06,
"loss": 0.538,
"step": 6840
},
{
"epoch": 0.43,
"grad_norm": 0.9098742008209229,
"learning_rate": 6.306148757820591e-06,
"loss": 0.5896,
"step": 6841
},
{
"epoch": 0.43,
"grad_norm": 0.8773499131202698,
"learning_rate": 6.3051583625816725e-06,
"loss": 0.5662,
"step": 6842
},
{
"epoch": 0.43,
"grad_norm": 0.81912761926651,
"learning_rate": 6.304167912387076e-06,
"loss": 0.5311,
"step": 6843
},
{
"epoch": 0.43,
"grad_norm": 0.8784845471382141,
"learning_rate": 6.303177407278504e-06,
"loss": 0.6069,
"step": 6844
},
{
"epoch": 0.43,
"grad_norm": 0.885051965713501,
"learning_rate": 6.302186847297666e-06,
"loss": 0.5553,
"step": 6845
},
{
"epoch": 0.43,
"grad_norm": 0.879306972026825,
"learning_rate": 6.301196232486269e-06,
"loss": 0.5763,
"step": 6846
},
{
"epoch": 0.43,
"grad_norm": 0.9128481149673462,
"learning_rate": 6.300205562886026e-06,
"loss": 0.5423,
"step": 6847
},
{
"epoch": 0.43,
"grad_norm": 0.9183526635169983,
"learning_rate": 6.29921483853865e-06,
"loss": 0.6028,
"step": 6848
},
{
"epoch": 0.43,
"grad_norm": 0.8842886090278625,
"learning_rate": 6.298224059485856e-06,
"loss": 0.5602,
"step": 6849
},
{
"epoch": 0.43,
"grad_norm": 0.87552809715271,
"learning_rate": 6.297233225769363e-06,
"loss": 0.6139,
"step": 6850
},
{
"epoch": 0.43,
"grad_norm": 0.8830863237380981,
"learning_rate": 6.296242337430892e-06,
"loss": 0.5313,
"step": 6851
},
{
"epoch": 0.43,
"grad_norm": 0.8993502259254456,
"learning_rate": 6.2952513945121654e-06,
"loss": 0.6239,
"step": 6852
},
{
"epoch": 0.43,
"grad_norm": 0.8739321827888489,
"learning_rate": 6.2942603970549075e-06,
"loss": 0.5823,
"step": 6853
},
{
"epoch": 0.43,
"grad_norm": 0.872380793094635,
"learning_rate": 6.293269345100849e-06,
"loss": 0.5773,
"step": 6854
},
{
"epoch": 0.43,
"grad_norm": 0.8558187484741211,
"learning_rate": 6.292278238691715e-06,
"loss": 0.6118,
"step": 6855
},
{
"epoch": 0.43,
"grad_norm": 0.8276113271713257,
"learning_rate": 6.29128707786924e-06,
"loss": 0.5871,
"step": 6856
},
{
"epoch": 0.43,
"grad_norm": 0.9095969200134277,
"learning_rate": 6.29029586267516e-06,
"loss": 0.5682,
"step": 6857
},
{
"epoch": 0.43,
"grad_norm": 0.9365728497505188,
"learning_rate": 6.289304593151209e-06,
"loss": 0.6114,
"step": 6858
},
{
"epoch": 0.43,
"grad_norm": 0.849093496799469,
"learning_rate": 6.288313269339126e-06,
"loss": 0.587,
"step": 6859
},
{
"epoch": 0.43,
"grad_norm": 0.871545672416687,
"learning_rate": 6.287321891280653e-06,
"loss": 0.5703,
"step": 6860
},
{
"epoch": 0.43,
"grad_norm": 0.8848944306373596,
"learning_rate": 6.2863304590175335e-06,
"loss": 0.5847,
"step": 6861
},
{
"epoch": 0.43,
"grad_norm": 0.8347170948982239,
"learning_rate": 6.2853389725915146e-06,
"loss": 0.5983,
"step": 6862
},
{
"epoch": 0.43,
"grad_norm": 0.8573687672615051,
"learning_rate": 6.284347432044342e-06,
"loss": 0.6016,
"step": 6863
},
{
"epoch": 0.43,
"grad_norm": 0.9318529963493347,
"learning_rate": 6.2833558374177664e-06,
"loss": 0.5946,
"step": 6864
},
{
"epoch": 0.43,
"grad_norm": 0.8856549859046936,
"learning_rate": 6.282364188753541e-06,
"loss": 0.5791,
"step": 6865
},
{
"epoch": 0.43,
"grad_norm": 0.8939905762672424,
"learning_rate": 6.281372486093422e-06,
"loss": 0.6297,
"step": 6866
},
{
"epoch": 0.44,
"grad_norm": 0.9018425941467285,
"learning_rate": 6.280380729479164e-06,
"loss": 0.5627,
"step": 6867
},
{
"epoch": 0.44,
"grad_norm": 0.9689738750457764,
"learning_rate": 6.279388918952527e-06,
"loss": 0.6036,
"step": 6868
},
{
"epoch": 0.44,
"grad_norm": 0.9120928049087524,
"learning_rate": 6.278397054555275e-06,
"loss": 0.6214,
"step": 6869
},
{
"epoch": 0.44,
"grad_norm": 0.9339777827262878,
"learning_rate": 6.277405136329169e-06,
"loss": 0.6575,
"step": 6870
},
{
"epoch": 0.44,
"grad_norm": 0.8833754658699036,
"learning_rate": 6.276413164315978e-06,
"loss": 0.5584,
"step": 6871
},
{
"epoch": 0.44,
"grad_norm": 0.8568885326385498,
"learning_rate": 6.2754211385574674e-06,
"loss": 0.5942,
"step": 6872
},
{
"epoch": 0.44,
"grad_norm": 0.9012584686279297,
"learning_rate": 6.274429059095411e-06,
"loss": 0.6192,
"step": 6873
},
{
"epoch": 0.44,
"grad_norm": 0.8778733611106873,
"learning_rate": 6.273436925971578e-06,
"loss": 0.5976,
"step": 6874
},
{
"epoch": 0.44,
"grad_norm": 0.8796364068984985,
"learning_rate": 6.272444739227748e-06,
"loss": 0.5434,
"step": 6875
},
{
"epoch": 0.44,
"grad_norm": 0.870852530002594,
"learning_rate": 6.2714524989056945e-06,
"loss": 0.6127,
"step": 6876
},
{
"epoch": 0.44,
"grad_norm": 0.9310038685798645,
"learning_rate": 6.270460205047202e-06,
"loss": 0.6447,
"step": 6877
},
{
"epoch": 0.44,
"grad_norm": 0.8664422035217285,
"learning_rate": 6.269467857694047e-06,
"loss": 0.5595,
"step": 6878
},
{
"epoch": 0.44,
"grad_norm": 0.9125611186027527,
"learning_rate": 6.268475456888019e-06,
"loss": 0.6122,
"step": 6879
},
{
"epoch": 0.44,
"grad_norm": 0.8741683959960938,
"learning_rate": 6.2674830026709014e-06,
"loss": 0.6289,
"step": 6880
},
{
"epoch": 0.44,
"grad_norm": 0.8851672410964966,
"learning_rate": 6.266490495084484e-06,
"loss": 0.5838,
"step": 6881
},
{
"epoch": 0.44,
"grad_norm": 0.9018517732620239,
"learning_rate": 6.265497934170559e-06,
"loss": 0.602,
"step": 6882
},
{
"epoch": 0.44,
"grad_norm": 0.8665854930877686,
"learning_rate": 6.264505319970915e-06,
"loss": 0.5744,
"step": 6883
},
{
"epoch": 0.44,
"grad_norm": 0.8087853789329529,
"learning_rate": 6.263512652527353e-06,
"loss": 0.5761,
"step": 6884
},
{
"epoch": 0.44,
"grad_norm": 0.8873346447944641,
"learning_rate": 6.262519931881669e-06,
"loss": 0.626,
"step": 6885
},
{
"epoch": 0.44,
"grad_norm": 0.9511377811431885,
"learning_rate": 6.261527158075662e-06,
"loss": 0.6155,
"step": 6886
},
{
"epoch": 0.44,
"grad_norm": 0.8355633616447449,
"learning_rate": 6.260534331151133e-06,
"loss": 0.5037,
"step": 6887
},
{
"epoch": 0.44,
"grad_norm": 0.8886730670928955,
"learning_rate": 6.259541451149892e-06,
"loss": 0.566,
"step": 6888
},
{
"epoch": 0.44,
"grad_norm": 0.8618388175964355,
"learning_rate": 6.258548518113741e-06,
"loss": 0.603,
"step": 6889
},
{
"epoch": 0.44,
"grad_norm": 0.8546575307846069,
"learning_rate": 6.257555532084489e-06,
"loss": 0.5327,
"step": 6890
},
{
"epoch": 0.44,
"grad_norm": 0.8894109725952148,
"learning_rate": 6.2565624931039485e-06,
"loss": 0.6208,
"step": 6891
},
{
"epoch": 0.44,
"grad_norm": 0.8885670900344849,
"learning_rate": 6.255569401213933e-06,
"loss": 0.6106,
"step": 6892
},
{
"epoch": 0.44,
"grad_norm": 0.8722066283226013,
"learning_rate": 6.254576256456257e-06,
"loss": 0.5616,
"step": 6893
},
{
"epoch": 0.44,
"grad_norm": 0.8073423504829407,
"learning_rate": 6.253583058872741e-06,
"loss": 0.5666,
"step": 6894
},
{
"epoch": 0.44,
"grad_norm": 0.8527199625968933,
"learning_rate": 6.2525898085052005e-06,
"loss": 0.5878,
"step": 6895
},
{
"epoch": 0.44,
"grad_norm": 0.8704131841659546,
"learning_rate": 6.251596505395463e-06,
"loss": 0.587,
"step": 6896
},
{
"epoch": 0.44,
"grad_norm": 0.9160160422325134,
"learning_rate": 6.25060314958535e-06,
"loss": 0.6092,
"step": 6897
},
{
"epoch": 0.44,
"grad_norm": 0.9297466278076172,
"learning_rate": 6.249609741116689e-06,
"loss": 0.573,
"step": 6898
},
{
"epoch": 0.44,
"grad_norm": 0.8892688155174255,
"learning_rate": 6.24861628003131e-06,
"loss": 0.633,
"step": 6899
},
{
"epoch": 0.44,
"grad_norm": 0.8495330214500427,
"learning_rate": 6.247622766371041e-06,
"loss": 0.5785,
"step": 6900
},
{
"epoch": 0.44,
"grad_norm": 0.8945955634117126,
"learning_rate": 6.246629200177718e-06,
"loss": 0.6395,
"step": 6901
},
{
"epoch": 0.44,
"grad_norm": 0.8665342330932617,
"learning_rate": 6.245635581493176e-06,
"loss": 0.5672,
"step": 6902
},
{
"epoch": 0.44,
"grad_norm": 0.9849283695220947,
"learning_rate": 6.244641910359254e-06,
"loss": 0.687,
"step": 6903
},
{
"epoch": 0.44,
"grad_norm": 0.8854192495346069,
"learning_rate": 6.24364818681779e-06,
"loss": 0.5928,
"step": 6904
},
{
"epoch": 0.44,
"grad_norm": 0.9012208580970764,
"learning_rate": 6.242654410910628e-06,
"loss": 0.5718,
"step": 6905
},
{
"epoch": 0.44,
"grad_norm": 0.9132283329963684,
"learning_rate": 6.2416605826796095e-06,
"loss": 0.647,
"step": 6906
},
{
"epoch": 0.44,
"grad_norm": 0.8352293968200684,
"learning_rate": 6.240666702166587e-06,
"loss": 0.6094,
"step": 6907
},
{
"epoch": 0.44,
"grad_norm": 0.8616271615028381,
"learning_rate": 6.239672769413403e-06,
"loss": 0.604,
"step": 6908
},
{
"epoch": 0.44,
"grad_norm": 0.8567937612533569,
"learning_rate": 6.238678784461913e-06,
"loss": 0.5418,
"step": 6909
},
{
"epoch": 0.44,
"grad_norm": 0.8747637867927551,
"learning_rate": 6.237684747353965e-06,
"loss": 0.5794,
"step": 6910
},
{
"epoch": 0.44,
"grad_norm": 0.8634200096130371,
"learning_rate": 6.23669065813142e-06,
"loss": 0.5864,
"step": 6911
},
{
"epoch": 0.44,
"grad_norm": 0.8566752076148987,
"learning_rate": 6.235696516836134e-06,
"loss": 0.601,
"step": 6912
},
{
"epoch": 0.44,
"grad_norm": 0.8930138945579529,
"learning_rate": 6.234702323509967e-06,
"loss": 0.6216,
"step": 6913
},
{
"epoch": 0.44,
"grad_norm": 0.9066216945648193,
"learning_rate": 6.233708078194778e-06,
"loss": 0.6281,
"step": 6914
},
{
"epoch": 0.44,
"grad_norm": 0.9339972138404846,
"learning_rate": 6.232713780932434e-06,
"loss": 0.6423,
"step": 6915
},
{
"epoch": 0.44,
"grad_norm": 0.9028674364089966,
"learning_rate": 6.231719431764804e-06,
"loss": 0.593,
"step": 6916
},
{
"epoch": 0.44,
"grad_norm": 0.8986188769340515,
"learning_rate": 6.230725030733751e-06,
"loss": 0.6325,
"step": 6917
},
{
"epoch": 0.44,
"grad_norm": 0.9198395013809204,
"learning_rate": 6.229730577881148e-06,
"loss": 0.6486,
"step": 6918
},
{
"epoch": 0.44,
"grad_norm": 0.8530875444412231,
"learning_rate": 6.2287360732488685e-06,
"loss": 0.5365,
"step": 6919
},
{
"epoch": 0.44,
"grad_norm": 0.9224251508712769,
"learning_rate": 6.227741516878789e-06,
"loss": 0.6252,
"step": 6920
},
{
"epoch": 0.44,
"grad_norm": 0.9238904118537903,
"learning_rate": 6.226746908812784e-06,
"loss": 0.6171,
"step": 6921
},
{
"epoch": 0.44,
"grad_norm": 0.9478338956832886,
"learning_rate": 6.225752249092734e-06,
"loss": 0.6544,
"step": 6922
},
{
"epoch": 0.44,
"grad_norm": 0.9255354404449463,
"learning_rate": 6.224757537760521e-06,
"loss": 0.6176,
"step": 6923
},
{
"epoch": 0.44,
"grad_norm": 0.8838732838630676,
"learning_rate": 6.2237627748580294e-06,
"loss": 0.633,
"step": 6924
},
{
"epoch": 0.44,
"grad_norm": 0.8878322243690491,
"learning_rate": 6.222767960427144e-06,
"loss": 0.6434,
"step": 6925
},
{
"epoch": 0.44,
"grad_norm": 0.8358001112937927,
"learning_rate": 6.221773094509753e-06,
"loss": 0.6189,
"step": 6926
},
{
"epoch": 0.44,
"grad_norm": 0.8636587858200073,
"learning_rate": 6.220778177147747e-06,
"loss": 0.5856,
"step": 6927
},
{
"epoch": 0.44,
"grad_norm": 0.859220027923584,
"learning_rate": 6.219783208383021e-06,
"loss": 0.568,
"step": 6928
},
{
"epoch": 0.44,
"grad_norm": 0.9622043967247009,
"learning_rate": 6.218788188257465e-06,
"loss": 0.5914,
"step": 6929
},
{
"epoch": 0.44,
"grad_norm": 0.8458547592163086,
"learning_rate": 6.217793116812979e-06,
"loss": 0.554,
"step": 6930
},
{
"epoch": 0.44,
"grad_norm": 0.8938260078430176,
"learning_rate": 6.216797994091462e-06,
"loss": 0.6286,
"step": 6931
},
{
"epoch": 0.44,
"grad_norm": 0.8333603739738464,
"learning_rate": 6.215802820134814e-06,
"loss": 0.5297,
"step": 6932
},
{
"epoch": 0.44,
"grad_norm": 0.8489833474159241,
"learning_rate": 6.214807594984939e-06,
"loss": 0.6139,
"step": 6933
},
{
"epoch": 0.44,
"grad_norm": 0.9169575572013855,
"learning_rate": 6.213812318683741e-06,
"loss": 0.6339,
"step": 6934
},
{
"epoch": 0.44,
"grad_norm": 0.8761480450630188,
"learning_rate": 6.2128169912731295e-06,
"loss": 0.6299,
"step": 6935
},
{
"epoch": 0.44,
"grad_norm": 0.948622465133667,
"learning_rate": 6.211821612795014e-06,
"loss": 0.5962,
"step": 6936
},
{
"epoch": 0.44,
"grad_norm": 0.874839186668396,
"learning_rate": 6.210826183291305e-06,
"loss": 0.5761,
"step": 6937
},
{
"epoch": 0.44,
"grad_norm": 0.8137356638908386,
"learning_rate": 6.209830702803918e-06,
"loss": 0.5615,
"step": 6938
},
{
"epoch": 0.44,
"grad_norm": 0.8627551794052124,
"learning_rate": 6.208835171374769e-06,
"loss": 0.6144,
"step": 6939
},
{
"epoch": 0.44,
"grad_norm": 0.9175261855125427,
"learning_rate": 6.207839589045777e-06,
"loss": 0.578,
"step": 6940
},
{
"epoch": 0.44,
"grad_norm": 0.8478714823722839,
"learning_rate": 6.20684395585886e-06,
"loss": 0.5606,
"step": 6941
},
{
"epoch": 0.44,
"grad_norm": 0.8721091747283936,
"learning_rate": 6.205848271855943e-06,
"loss": 0.5986,
"step": 6942
},
{
"epoch": 0.44,
"grad_norm": 0.9747815132141113,
"learning_rate": 6.204852537078952e-06,
"loss": 0.5936,
"step": 6943
},
{
"epoch": 0.44,
"grad_norm": 0.9325621724128723,
"learning_rate": 6.203856751569809e-06,
"loss": 0.6673,
"step": 6944
},
{
"epoch": 0.44,
"grad_norm": 0.8681188821792603,
"learning_rate": 6.202860915370447e-06,
"loss": 0.6053,
"step": 6945
},
{
"epoch": 0.44,
"grad_norm": 0.8858415484428406,
"learning_rate": 6.201865028522798e-06,
"loss": 0.5277,
"step": 6946
},
{
"epoch": 0.44,
"grad_norm": 0.9327991604804993,
"learning_rate": 6.200869091068791e-06,
"loss": 0.5919,
"step": 6947
},
{
"epoch": 0.44,
"grad_norm": 0.8470887541770935,
"learning_rate": 6.1998731030503655e-06,
"loss": 0.5295,
"step": 6948
},
{
"epoch": 0.44,
"grad_norm": 0.8615082502365112,
"learning_rate": 6.198877064509458e-06,
"loss": 0.5833,
"step": 6949
},
{
"epoch": 0.44,
"grad_norm": 0.9464468359947205,
"learning_rate": 6.1978809754880076e-06,
"loss": 0.6293,
"step": 6950
},
{
"epoch": 0.44,
"grad_norm": 0.8637322783470154,
"learning_rate": 6.196884836027957e-06,
"loss": 0.5632,
"step": 6951
},
{
"epoch": 0.44,
"grad_norm": 0.8578369617462158,
"learning_rate": 6.195888646171247e-06,
"loss": 0.5286,
"step": 6952
},
{
"epoch": 0.44,
"grad_norm": 0.8746157288551331,
"learning_rate": 6.194892405959829e-06,
"loss": 0.597,
"step": 6953
},
{
"epoch": 0.44,
"grad_norm": 0.8710793852806091,
"learning_rate": 6.193896115435648e-06,
"loss": 0.603,
"step": 6954
},
{
"epoch": 0.44,
"grad_norm": 0.8585920929908752,
"learning_rate": 6.192899774640655e-06,
"loss": 0.6162,
"step": 6955
},
{
"epoch": 0.44,
"grad_norm": 0.8971749544143677,
"learning_rate": 6.191903383616801e-06,
"loss": 0.5941,
"step": 6956
},
{
"epoch": 0.44,
"grad_norm": 0.8642129898071289,
"learning_rate": 6.190906942406043e-06,
"loss": 0.5589,
"step": 6957
},
{
"epoch": 0.44,
"grad_norm": 0.9450658559799194,
"learning_rate": 6.189910451050336e-06,
"loss": 0.5831,
"step": 6958
},
{
"epoch": 0.44,
"grad_norm": 0.9805313348770142,
"learning_rate": 6.1889139095916395e-06,
"loss": 0.6607,
"step": 6959
},
{
"epoch": 0.44,
"grad_norm": 0.8744614124298096,
"learning_rate": 6.187917318071914e-06,
"loss": 0.6163,
"step": 6960
},
{
"epoch": 0.44,
"grad_norm": 0.8556416034698486,
"learning_rate": 6.1869206765331234e-06,
"loss": 0.5893,
"step": 6961
},
{
"epoch": 0.44,
"grad_norm": 0.8845242261886597,
"learning_rate": 6.1859239850172325e-06,
"loss": 0.5842,
"step": 6962
},
{
"epoch": 0.44,
"grad_norm": 0.8819428086280823,
"learning_rate": 6.1849272435662065e-06,
"loss": 0.5875,
"step": 6963
},
{
"epoch": 0.44,
"grad_norm": 0.9113361239433289,
"learning_rate": 6.183930452222017e-06,
"loss": 0.5681,
"step": 6964
},
{
"epoch": 0.44,
"grad_norm": 0.8745653629302979,
"learning_rate": 6.1829336110266356e-06,
"loss": 0.5826,
"step": 6965
},
{
"epoch": 0.44,
"grad_norm": 0.8720842599868774,
"learning_rate": 6.181936720022033e-06,
"loss": 0.6105,
"step": 6966
},
{
"epoch": 0.44,
"grad_norm": 0.9576596617698669,
"learning_rate": 6.180939779250188e-06,
"loss": 0.6231,
"step": 6967
},
{
"epoch": 0.44,
"grad_norm": 0.8861308097839355,
"learning_rate": 6.179942788753077e-06,
"loss": 0.6204,
"step": 6968
},
{
"epoch": 0.44,
"grad_norm": 0.9912355542182922,
"learning_rate": 6.178945748572681e-06,
"loss": 0.6036,
"step": 6969
},
{
"epoch": 0.44,
"grad_norm": 0.9015969634056091,
"learning_rate": 6.177948658750979e-06,
"loss": 0.5217,
"step": 6970
},
{
"epoch": 0.44,
"grad_norm": 0.8962372541427612,
"learning_rate": 6.176951519329958e-06,
"loss": 0.5973,
"step": 6971
},
{
"epoch": 0.44,
"grad_norm": 0.9305719137191772,
"learning_rate": 6.1759543303516025e-06,
"loss": 0.5845,
"step": 6972
},
{
"epoch": 0.44,
"grad_norm": 0.9429194331169128,
"learning_rate": 6.174957091857901e-06,
"loss": 0.6336,
"step": 6973
},
{
"epoch": 0.44,
"grad_norm": 0.8551360368728638,
"learning_rate": 6.173959803890843e-06,
"loss": 0.5911,
"step": 6974
},
{
"epoch": 0.44,
"grad_norm": 0.9430440068244934,
"learning_rate": 6.172962466492423e-06,
"loss": 0.612,
"step": 6975
},
{
"epoch": 0.44,
"grad_norm": 0.8664399981498718,
"learning_rate": 6.171965079704634e-06,
"loss": 0.6186,
"step": 6976
},
{
"epoch": 0.44,
"grad_norm": 0.9083961248397827,
"learning_rate": 6.17096764356947e-06,
"loss": 0.5966,
"step": 6977
},
{
"epoch": 0.44,
"grad_norm": 0.8894690275192261,
"learning_rate": 6.169970158128935e-06,
"loss": 0.6315,
"step": 6978
},
{
"epoch": 0.44,
"grad_norm": 0.9232130646705627,
"learning_rate": 6.168972623425023e-06,
"loss": 0.5961,
"step": 6979
},
{
"epoch": 0.44,
"grad_norm": 0.8410488963127136,
"learning_rate": 6.167975039499744e-06,
"loss": 0.5795,
"step": 6980
},
{
"epoch": 0.44,
"grad_norm": 0.8683662414550781,
"learning_rate": 6.1669774063950985e-06,
"loss": 0.6774,
"step": 6981
},
{
"epoch": 0.44,
"grad_norm": 1.0402193069458008,
"learning_rate": 6.165979724153094e-06,
"loss": 0.636,
"step": 6982
},
{
"epoch": 0.44,
"grad_norm": 0.8895815014839172,
"learning_rate": 6.164981992815737e-06,
"loss": 0.5795,
"step": 6983
},
{
"epoch": 0.44,
"grad_norm": 0.8773569464683533,
"learning_rate": 6.163984212425043e-06,
"loss": 0.5905,
"step": 6984
},
{
"epoch": 0.44,
"grad_norm": 0.8979213833808899,
"learning_rate": 6.162986383023023e-06,
"loss": 0.6089,
"step": 6985
},
{
"epoch": 0.44,
"grad_norm": 0.953054666519165,
"learning_rate": 6.161988504651692e-06,
"loss": 0.6058,
"step": 6986
},
{
"epoch": 0.44,
"grad_norm": 0.8674301505088806,
"learning_rate": 6.160990577353066e-06,
"loss": 0.6171,
"step": 6987
},
{
"epoch": 0.44,
"grad_norm": 0.8784220814704895,
"learning_rate": 6.1599926011691695e-06,
"loss": 0.5925,
"step": 6988
},
{
"epoch": 0.44,
"grad_norm": 0.9530996084213257,
"learning_rate": 6.1589945761420166e-06,
"loss": 0.6118,
"step": 6989
},
{
"epoch": 0.44,
"grad_norm": 0.892207145690918,
"learning_rate": 6.157996502313635e-06,
"loss": 0.5923,
"step": 6990
},
{
"epoch": 0.44,
"grad_norm": 0.803701639175415,
"learning_rate": 6.156998379726048e-06,
"loss": 0.549,
"step": 6991
},
{
"epoch": 0.44,
"grad_norm": 0.9421709775924683,
"learning_rate": 6.1560002084212845e-06,
"loss": 0.5709,
"step": 6992
},
{
"epoch": 0.44,
"grad_norm": 0.8066360354423523,
"learning_rate": 6.155001988441375e-06,
"loss": 0.5746,
"step": 6993
},
{
"epoch": 0.44,
"grad_norm": 0.8635882139205933,
"learning_rate": 6.154003719828349e-06,
"loss": 0.5782,
"step": 6994
},
{
"epoch": 0.44,
"grad_norm": 0.8454831838607788,
"learning_rate": 6.1530054026242405e-06,
"loss": 0.5256,
"step": 6995
},
{
"epoch": 0.44,
"grad_norm": 0.890565037727356,
"learning_rate": 6.152007036871085e-06,
"loss": 0.5331,
"step": 6996
},
{
"epoch": 0.44,
"grad_norm": 0.8612550497055054,
"learning_rate": 6.151008622610921e-06,
"loss": 0.6093,
"step": 6997
},
{
"epoch": 0.44,
"grad_norm": 0.8857212066650391,
"learning_rate": 6.150010159885789e-06,
"loss": 0.5336,
"step": 6998
},
{
"epoch": 0.44,
"grad_norm": 0.9954962730407715,
"learning_rate": 6.149011648737728e-06,
"loss": 0.6686,
"step": 6999
},
{
"epoch": 0.44,
"grad_norm": 0.9537789225578308,
"learning_rate": 6.148013089208784e-06,
"loss": 0.6394,
"step": 7000
},
{
"epoch": 0.44,
"grad_norm": 0.872518002986908,
"learning_rate": 6.1470144813410045e-06,
"loss": 0.6226,
"step": 7001
},
{
"epoch": 0.44,
"grad_norm": 0.8990280032157898,
"learning_rate": 6.146015825176432e-06,
"loss": 0.5554,
"step": 7002
},
{
"epoch": 0.44,
"grad_norm": 0.8978776931762695,
"learning_rate": 6.145017120757123e-06,
"loss": 0.6101,
"step": 7003
},
{
"epoch": 0.44,
"grad_norm": 0.9086971879005432,
"learning_rate": 6.144018368125124e-06,
"loss": 0.5648,
"step": 7004
},
{
"epoch": 0.44,
"grad_norm": 0.8578811287879944,
"learning_rate": 6.143019567322493e-06,
"loss": 0.5522,
"step": 7005
},
{
"epoch": 0.44,
"grad_norm": 0.9005651473999023,
"learning_rate": 6.1420207183912824e-06,
"loss": 0.6659,
"step": 7006
},
{
"epoch": 0.44,
"grad_norm": 0.8784024715423584,
"learning_rate": 6.141021821373555e-06,
"loss": 0.5978,
"step": 7007
},
{
"epoch": 0.44,
"grad_norm": 0.9198904633522034,
"learning_rate": 6.140022876311367e-06,
"loss": 0.5903,
"step": 7008
},
{
"epoch": 0.44,
"grad_norm": 0.8121350407600403,
"learning_rate": 6.139023883246781e-06,
"loss": 0.5551,
"step": 7009
},
{
"epoch": 0.44,
"grad_norm": 0.8870401382446289,
"learning_rate": 6.1380248422218604e-06,
"loss": 0.521,
"step": 7010
},
{
"epoch": 0.44,
"grad_norm": 0.9249464869499207,
"learning_rate": 6.137025753278673e-06,
"loss": 0.6247,
"step": 7011
},
{
"epoch": 0.44,
"grad_norm": 0.8742251992225647,
"learning_rate": 6.1360266164592886e-06,
"loss": 0.6279,
"step": 7012
},
{
"epoch": 0.44,
"grad_norm": 0.9462286829948425,
"learning_rate": 6.135027431805774e-06,
"loss": 0.6258,
"step": 7013
},
{
"epoch": 0.44,
"grad_norm": 0.8585238456726074,
"learning_rate": 6.134028199360203e-06,
"loss": 0.5567,
"step": 7014
},
{
"epoch": 0.44,
"grad_norm": 0.8556974530220032,
"learning_rate": 6.133028919164647e-06,
"loss": 0.6523,
"step": 7015
},
{
"epoch": 0.44,
"grad_norm": 0.8407923579216003,
"learning_rate": 6.132029591261188e-06,
"loss": 0.5777,
"step": 7016
},
{
"epoch": 0.44,
"grad_norm": 0.9544344544410706,
"learning_rate": 6.1310302156919e-06,
"loss": 0.6063,
"step": 7017
},
{
"epoch": 0.44,
"grad_norm": 0.9588916301727295,
"learning_rate": 6.130030792498865e-06,
"loss": 0.6007,
"step": 7018
},
{
"epoch": 0.44,
"grad_norm": 0.8659964203834534,
"learning_rate": 6.129031321724163e-06,
"loss": 0.5848,
"step": 7019
},
{
"epoch": 0.44,
"grad_norm": 0.9121047854423523,
"learning_rate": 6.128031803409881e-06,
"loss": 0.5774,
"step": 7020
},
{
"epoch": 0.44,
"grad_norm": 0.9202248454093933,
"learning_rate": 6.127032237598102e-06,
"loss": 0.6225,
"step": 7021
},
{
"epoch": 0.44,
"grad_norm": 0.8752833604812622,
"learning_rate": 6.126032624330917e-06,
"loss": 0.5862,
"step": 7022
},
{
"epoch": 0.44,
"grad_norm": 0.8969300389289856,
"learning_rate": 6.125032963650417e-06,
"loss": 0.6297,
"step": 7023
},
{
"epoch": 0.45,
"grad_norm": 0.880032479763031,
"learning_rate": 6.124033255598691e-06,
"loss": 0.5669,
"step": 7024
},
{
"epoch": 0.45,
"grad_norm": 0.8935046195983887,
"learning_rate": 6.1230335002178345e-06,
"loss": 0.5929,
"step": 7025
},
{
"epoch": 0.45,
"grad_norm": 0.9103530645370483,
"learning_rate": 6.1220336975499435e-06,
"loss": 0.5772,
"step": 7026
},
{
"epoch": 0.45,
"grad_norm": 0.8994046449661255,
"learning_rate": 6.121033847637119e-06,
"loss": 0.63,
"step": 7027
},
{
"epoch": 0.45,
"grad_norm": 0.8760805726051331,
"learning_rate": 6.120033950521458e-06,
"loss": 0.568,
"step": 7028
},
{
"epoch": 0.45,
"grad_norm": 0.8472411632537842,
"learning_rate": 6.119034006245063e-06,
"loss": 0.5667,
"step": 7029
},
{
"epoch": 0.45,
"grad_norm": 0.9165576100349426,
"learning_rate": 6.118034014850039e-06,
"loss": 0.6196,
"step": 7030
},
{
"epoch": 0.45,
"grad_norm": 0.8201605677604675,
"learning_rate": 6.117033976378493e-06,
"loss": 0.5162,
"step": 7031
},
{
"epoch": 0.45,
"grad_norm": 0.9203435182571411,
"learning_rate": 6.116033890872531e-06,
"loss": 0.558,
"step": 7032
},
{
"epoch": 0.45,
"grad_norm": 0.8711187839508057,
"learning_rate": 6.115033758374265e-06,
"loss": 0.634,
"step": 7033
},
{
"epoch": 0.45,
"grad_norm": 0.8825559616088867,
"learning_rate": 6.114033578925805e-06,
"loss": 0.599,
"step": 7034
},
{
"epoch": 0.45,
"grad_norm": 0.8482293486595154,
"learning_rate": 6.1130333525692684e-06,
"loss": 0.583,
"step": 7035
},
{
"epoch": 0.45,
"grad_norm": 0.9043130278587341,
"learning_rate": 6.112033079346767e-06,
"loss": 0.575,
"step": 7036
},
{
"epoch": 0.45,
"grad_norm": 0.8233504295349121,
"learning_rate": 6.111032759300423e-06,
"loss": 0.6182,
"step": 7037
},
{
"epoch": 0.45,
"grad_norm": 0.8660386204719543,
"learning_rate": 6.110032392472354e-06,
"loss": 0.5851,
"step": 7038
},
{
"epoch": 0.45,
"grad_norm": 0.8297396302223206,
"learning_rate": 6.109031978904683e-06,
"loss": 0.5715,
"step": 7039
},
{
"epoch": 0.45,
"grad_norm": 0.8824520707130432,
"learning_rate": 6.108031518639532e-06,
"loss": 0.6218,
"step": 7040
},
{
"epoch": 0.45,
"grad_norm": 0.941839873790741,
"learning_rate": 6.107031011719029e-06,
"loss": 0.6708,
"step": 7041
},
{
"epoch": 0.45,
"grad_norm": 0.8961352705955505,
"learning_rate": 6.106030458185303e-06,
"loss": 0.5851,
"step": 7042
},
{
"epoch": 0.45,
"grad_norm": 0.9293150305747986,
"learning_rate": 6.105029858080479e-06,
"loss": 0.5899,
"step": 7043
},
{
"epoch": 0.45,
"grad_norm": 0.84063720703125,
"learning_rate": 6.1040292114466935e-06,
"loss": 0.5756,
"step": 7044
},
{
"epoch": 0.45,
"grad_norm": 0.8923290371894836,
"learning_rate": 6.103028518326077e-06,
"loss": 0.5649,
"step": 7045
},
{
"epoch": 0.45,
"grad_norm": 0.9170678853988647,
"learning_rate": 6.102027778760769e-06,
"loss": 0.6111,
"step": 7046
},
{
"epoch": 0.45,
"grad_norm": 0.9100625514984131,
"learning_rate": 6.101026992792904e-06,
"loss": 0.6542,
"step": 7047
},
{
"epoch": 0.45,
"grad_norm": 0.842936635017395,
"learning_rate": 6.100026160464621e-06,
"loss": 0.5677,
"step": 7048
},
{
"epoch": 0.45,
"grad_norm": 0.8089660406112671,
"learning_rate": 6.099025281818065e-06,
"loss": 0.5623,
"step": 7049
},
{
"epoch": 0.45,
"grad_norm": 0.8938851952552795,
"learning_rate": 6.098024356895378e-06,
"loss": 0.5826,
"step": 7050
},
{
"epoch": 0.45,
"grad_norm": 0.8716176152229309,
"learning_rate": 6.097023385738704e-06,
"loss": 0.5478,
"step": 7051
},
{
"epoch": 0.45,
"grad_norm": 0.9070749878883362,
"learning_rate": 6.096022368390191e-06,
"loss": 0.6217,
"step": 7052
},
{
"epoch": 0.45,
"grad_norm": 0.8996195197105408,
"learning_rate": 6.0950213048919895e-06,
"loss": 0.5936,
"step": 7053
},
{
"epoch": 0.45,
"grad_norm": 0.8879945278167725,
"learning_rate": 6.094020195286251e-06,
"loss": 0.57,
"step": 7054
},
{
"epoch": 0.45,
"grad_norm": 0.9025259017944336,
"learning_rate": 6.093019039615128e-06,
"loss": 0.6607,
"step": 7055
},
{
"epoch": 0.45,
"grad_norm": 0.8679327368736267,
"learning_rate": 6.092017837920773e-06,
"loss": 0.5673,
"step": 7056
},
{
"epoch": 0.45,
"grad_norm": 0.8866552114486694,
"learning_rate": 6.091016590245347e-06,
"loss": 0.5824,
"step": 7057
},
{
"epoch": 0.45,
"grad_norm": 0.885572075843811,
"learning_rate": 6.090015296631009e-06,
"loss": 0.5513,
"step": 7058
},
{
"epoch": 0.45,
"grad_norm": 0.9012414813041687,
"learning_rate": 6.089013957119918e-06,
"loss": 0.6531,
"step": 7059
},
{
"epoch": 0.45,
"grad_norm": 0.833052396774292,
"learning_rate": 6.088012571754236e-06,
"loss": 0.5201,
"step": 7060
},
{
"epoch": 0.45,
"grad_norm": 0.952321469783783,
"learning_rate": 6.087011140576132e-06,
"loss": 0.6119,
"step": 7061
},
{
"epoch": 0.45,
"grad_norm": 0.8854734301567078,
"learning_rate": 6.086009663627769e-06,
"loss": 0.5879,
"step": 7062
},
{
"epoch": 0.45,
"grad_norm": 0.899401068687439,
"learning_rate": 6.085008140951318e-06,
"loss": 0.5806,
"step": 7063
},
{
"epoch": 0.45,
"grad_norm": 0.945583164691925,
"learning_rate": 6.084006572588947e-06,
"loss": 0.6169,
"step": 7064
},
{
"epoch": 0.45,
"grad_norm": 0.8657901287078857,
"learning_rate": 6.083004958582832e-06,
"loss": 0.5392,
"step": 7065
},
{
"epoch": 0.45,
"grad_norm": 0.8929893374443054,
"learning_rate": 6.082003298975144e-06,
"loss": 0.6197,
"step": 7066
},
{
"epoch": 0.45,
"grad_norm": 0.8724581599235535,
"learning_rate": 6.081001593808063e-06,
"loss": 0.5692,
"step": 7067
},
{
"epoch": 0.45,
"grad_norm": 0.9062278866767883,
"learning_rate": 6.079999843123763e-06,
"loss": 0.6532,
"step": 7068
},
{
"epoch": 0.45,
"grad_norm": 0.8632785081863403,
"learning_rate": 6.07899804696443e-06,
"loss": 0.6221,
"step": 7069
},
{
"epoch": 0.45,
"grad_norm": 0.9054343104362488,
"learning_rate": 6.077996205372241e-06,
"loss": 0.5598,
"step": 7070
},
{
"epoch": 0.45,
"grad_norm": 0.8777742385864258,
"learning_rate": 6.07699431838938e-06,
"loss": 0.5767,
"step": 7071
},
{
"epoch": 0.45,
"grad_norm": 0.8978030681610107,
"learning_rate": 6.075992386058037e-06,
"loss": 0.5976,
"step": 7072
},
{
"epoch": 0.45,
"grad_norm": 0.9662826061248779,
"learning_rate": 6.074990408420397e-06,
"loss": 0.5744,
"step": 7073
},
{
"epoch": 0.45,
"grad_norm": 0.8414073586463928,
"learning_rate": 6.073988385518652e-06,
"loss": 0.543,
"step": 7074
},
{
"epoch": 0.45,
"grad_norm": 0.863689124584198,
"learning_rate": 6.07298631739499e-06,
"loss": 0.6149,
"step": 7075
},
{
"epoch": 0.45,
"grad_norm": 0.8683016896247864,
"learning_rate": 6.071984204091608e-06,
"loss": 0.6363,
"step": 7076
},
{
"epoch": 0.45,
"grad_norm": 0.9165253639221191,
"learning_rate": 6.0709820456507e-06,
"loss": 0.6226,
"step": 7077
},
{
"epoch": 0.45,
"grad_norm": 0.8738973140716553,
"learning_rate": 6.069979842114465e-06,
"loss": 0.5733,
"step": 7078
},
{
"epoch": 0.45,
"grad_norm": 0.954142153263092,
"learning_rate": 6.068977593525098e-06,
"loss": 0.625,
"step": 7079
},
{
"epoch": 0.45,
"grad_norm": 0.8756037354469299,
"learning_rate": 6.067975299924806e-06,
"loss": 0.6264,
"step": 7080
},
{
"epoch": 0.45,
"grad_norm": 0.8994114398956299,
"learning_rate": 6.066972961355788e-06,
"loss": 0.6367,
"step": 7081
},
{
"epoch": 0.45,
"grad_norm": 0.8711373805999756,
"learning_rate": 6.065970577860252e-06,
"loss": 0.5993,
"step": 7082
},
{
"epoch": 0.45,
"grad_norm": 0.9261775016784668,
"learning_rate": 6.0649681494804014e-06,
"loss": 0.619,
"step": 7083
},
{
"epoch": 0.45,
"grad_norm": 0.8910576701164246,
"learning_rate": 6.063965676258448e-06,
"loss": 0.594,
"step": 7084
},
{
"epoch": 0.45,
"grad_norm": 0.8395871520042419,
"learning_rate": 6.0629631582366015e-06,
"loss": 0.6077,
"step": 7085
},
{
"epoch": 0.45,
"grad_norm": 0.8894856572151184,
"learning_rate": 6.0619605954570726e-06,
"loss": 0.5605,
"step": 7086
},
{
"epoch": 0.45,
"grad_norm": 0.8762527704238892,
"learning_rate": 6.060957987962077e-06,
"loss": 0.5896,
"step": 7087
},
{
"epoch": 0.45,
"grad_norm": 0.8831724524497986,
"learning_rate": 6.059955335793832e-06,
"loss": 0.6159,
"step": 7088
},
{
"epoch": 0.45,
"grad_norm": 0.9416611790657043,
"learning_rate": 6.0589526389945576e-06,
"loss": 0.5841,
"step": 7089
},
{
"epoch": 0.45,
"grad_norm": 0.8676833510398865,
"learning_rate": 6.057949897606469e-06,
"loss": 0.5974,
"step": 7090
},
{
"epoch": 0.45,
"grad_norm": 0.8935969471931458,
"learning_rate": 6.05694711167179e-06,
"loss": 0.5916,
"step": 7091
},
{
"epoch": 0.45,
"grad_norm": 0.8953608870506287,
"learning_rate": 6.055944281232746e-06,
"loss": 0.6164,
"step": 7092
},
{
"epoch": 0.45,
"grad_norm": 0.9108656048774719,
"learning_rate": 6.0549414063315625e-06,
"loss": 0.6615,
"step": 7093
},
{
"epoch": 0.45,
"grad_norm": 0.8927263617515564,
"learning_rate": 6.053938487010464e-06,
"loss": 0.5843,
"step": 7094
},
{
"epoch": 0.45,
"grad_norm": 0.8488055467605591,
"learning_rate": 6.052935523311684e-06,
"loss": 0.5774,
"step": 7095
},
{
"epoch": 0.45,
"grad_norm": 0.861768901348114,
"learning_rate": 6.0519325152774515e-06,
"loss": 0.5815,
"step": 7096
},
{
"epoch": 0.45,
"grad_norm": 0.8488924503326416,
"learning_rate": 6.05092946295e-06,
"loss": 0.6129,
"step": 7097
},
{
"epoch": 0.45,
"grad_norm": 0.8515621423721313,
"learning_rate": 6.049926366371565e-06,
"loss": 0.6088,
"step": 7098
},
{
"epoch": 0.45,
"grad_norm": 0.8117412328720093,
"learning_rate": 6.048923225584383e-06,
"loss": 0.5814,
"step": 7099
},
{
"epoch": 0.45,
"grad_norm": 0.9428633451461792,
"learning_rate": 6.047920040630692e-06,
"loss": 0.5821,
"step": 7100
},
{
"epoch": 0.45,
"grad_norm": 0.8841315507888794,
"learning_rate": 6.046916811552735e-06,
"loss": 0.5685,
"step": 7101
},
{
"epoch": 0.45,
"grad_norm": 0.8266769051551819,
"learning_rate": 6.045913538392754e-06,
"loss": 0.5903,
"step": 7102
},
{
"epoch": 0.45,
"grad_norm": 0.9347844123840332,
"learning_rate": 6.04491022119299e-06,
"loss": 0.6212,
"step": 7103
},
{
"epoch": 0.45,
"grad_norm": 0.8492763042449951,
"learning_rate": 6.043906859995693e-06,
"loss": 0.5411,
"step": 7104
},
{
"epoch": 0.45,
"grad_norm": 0.8961597084999084,
"learning_rate": 6.042903454843109e-06,
"loss": 0.5772,
"step": 7105
},
{
"epoch": 0.45,
"grad_norm": 0.9451767802238464,
"learning_rate": 6.041900005777488e-06,
"loss": 0.6845,
"step": 7106
},
{
"epoch": 0.45,
"grad_norm": 0.8957589268684387,
"learning_rate": 6.040896512841083e-06,
"loss": 0.607,
"step": 7107
},
{
"epoch": 0.45,
"grad_norm": 0.9263405203819275,
"learning_rate": 6.039892976076147e-06,
"loss": 0.5677,
"step": 7108
},
{
"epoch": 0.45,
"grad_norm": 0.842929482460022,
"learning_rate": 6.038889395524935e-06,
"loss": 0.6016,
"step": 7109
},
{
"epoch": 0.45,
"grad_norm": 0.9199305772781372,
"learning_rate": 6.037885771229703e-06,
"loss": 0.6241,
"step": 7110
},
{
"epoch": 0.45,
"grad_norm": 0.8669660091400146,
"learning_rate": 6.036882103232714e-06,
"loss": 0.6024,
"step": 7111
},
{
"epoch": 0.45,
"grad_norm": 0.8775947690010071,
"learning_rate": 6.0358783915762265e-06,
"loss": 0.5895,
"step": 7112
},
{
"epoch": 0.45,
"grad_norm": 0.8761069178581238,
"learning_rate": 6.034874636302502e-06,
"loss": 0.5858,
"step": 7113
},
{
"epoch": 0.45,
"grad_norm": 0.9764485955238342,
"learning_rate": 6.033870837453808e-06,
"loss": 0.6528,
"step": 7114
},
{
"epoch": 0.45,
"grad_norm": 0.8723066449165344,
"learning_rate": 6.0328669950724096e-06,
"loss": 0.5737,
"step": 7115
},
{
"epoch": 0.45,
"grad_norm": 0.8759384751319885,
"learning_rate": 6.031863109200575e-06,
"loss": 0.6642,
"step": 7116
},
{
"epoch": 0.45,
"grad_norm": 0.8828076124191284,
"learning_rate": 6.030859179880574e-06,
"loss": 0.6082,
"step": 7117
},
{
"epoch": 0.45,
"grad_norm": 0.9559153318405151,
"learning_rate": 6.029855207154679e-06,
"loss": 0.599,
"step": 7118
},
{
"epoch": 0.45,
"grad_norm": 0.8825298547744751,
"learning_rate": 6.0288511910651644e-06,
"loss": 0.5871,
"step": 7119
},
{
"epoch": 0.45,
"grad_norm": 0.8728659152984619,
"learning_rate": 6.027847131654305e-06,
"loss": 0.5783,
"step": 7120
},
{
"epoch": 0.45,
"grad_norm": 0.9349566102027893,
"learning_rate": 6.026843028964378e-06,
"loss": 0.6797,
"step": 7121
},
{
"epoch": 0.45,
"grad_norm": 0.9346398711204529,
"learning_rate": 6.025838883037664e-06,
"loss": 0.5802,
"step": 7122
},
{
"epoch": 0.45,
"grad_norm": 1.0061968564987183,
"learning_rate": 6.024834693916443e-06,
"loss": 0.6027,
"step": 7123
},
{
"epoch": 0.45,
"grad_norm": 0.8729983568191528,
"learning_rate": 6.023830461642998e-06,
"loss": 0.5871,
"step": 7124
},
{
"epoch": 0.45,
"grad_norm": 0.8659200072288513,
"learning_rate": 6.022826186259614e-06,
"loss": 0.575,
"step": 7125
},
{
"epoch": 0.45,
"grad_norm": 0.8419411182403564,
"learning_rate": 6.021821867808576e-06,
"loss": 0.5713,
"step": 7126
},
{
"epoch": 0.45,
"grad_norm": 0.9160114526748657,
"learning_rate": 6.0208175063321765e-06,
"loss": 0.5911,
"step": 7127
},
{
"epoch": 0.45,
"grad_norm": 0.869117259979248,
"learning_rate": 6.019813101872701e-06,
"loss": 0.5719,
"step": 7128
},
{
"epoch": 0.45,
"grad_norm": 0.9373559355735779,
"learning_rate": 6.018808654472445e-06,
"loss": 0.5958,
"step": 7129
},
{
"epoch": 0.45,
"grad_norm": 0.9139472246170044,
"learning_rate": 6.017804164173698e-06,
"loss": 0.6223,
"step": 7130
},
{
"epoch": 0.45,
"grad_norm": 0.9792049527168274,
"learning_rate": 6.0167996310187615e-06,
"loss": 0.6056,
"step": 7131
},
{
"epoch": 0.45,
"grad_norm": 0.8391651511192322,
"learning_rate": 6.015795055049929e-06,
"loss": 0.5411,
"step": 7132
},
{
"epoch": 0.45,
"grad_norm": 0.9273377060890198,
"learning_rate": 6.014790436309499e-06,
"loss": 0.6351,
"step": 7133
},
{
"epoch": 0.45,
"grad_norm": 0.8438581228256226,
"learning_rate": 6.013785774839776e-06,
"loss": 0.5491,
"step": 7134
},
{
"epoch": 0.45,
"grad_norm": 0.858647882938385,
"learning_rate": 6.012781070683058e-06,
"loss": 0.6346,
"step": 7135
},
{
"epoch": 0.45,
"grad_norm": 0.8910332918167114,
"learning_rate": 6.011776323881654e-06,
"loss": 0.6019,
"step": 7136
},
{
"epoch": 0.45,
"grad_norm": 0.8092496991157532,
"learning_rate": 6.0107715344778684e-06,
"loss": 0.5526,
"step": 7137
},
{
"epoch": 0.45,
"grad_norm": 0.8764297962188721,
"learning_rate": 6.00976670251401e-06,
"loss": 0.6015,
"step": 7138
},
{
"epoch": 0.45,
"grad_norm": 0.8800103664398193,
"learning_rate": 6.008761828032389e-06,
"loss": 0.5809,
"step": 7139
},
{
"epoch": 0.45,
"grad_norm": 0.9022515416145325,
"learning_rate": 6.007756911075315e-06,
"loss": 0.5433,
"step": 7140
},
{
"epoch": 0.45,
"grad_norm": 0.8949940800666809,
"learning_rate": 6.006751951685104e-06,
"loss": 0.5678,
"step": 7141
},
{
"epoch": 0.45,
"grad_norm": 0.9681234955787659,
"learning_rate": 6.005746949904072e-06,
"loss": 0.6141,
"step": 7142
},
{
"epoch": 0.45,
"grad_norm": 0.863433301448822,
"learning_rate": 6.004741905774533e-06,
"loss": 0.6122,
"step": 7143
},
{
"epoch": 0.45,
"grad_norm": 0.9015092253684998,
"learning_rate": 6.003736819338808e-06,
"loss": 0.6173,
"step": 7144
},
{
"epoch": 0.45,
"grad_norm": 0.9383165836334229,
"learning_rate": 6.0027316906392165e-06,
"loss": 0.5596,
"step": 7145
},
{
"epoch": 0.45,
"grad_norm": 0.8968831896781921,
"learning_rate": 6.001726519718083e-06,
"loss": 0.5773,
"step": 7146
},
{
"epoch": 0.45,
"grad_norm": 0.9414469003677368,
"learning_rate": 6.000721306617731e-06,
"loss": 0.613,
"step": 7147
},
{
"epoch": 0.45,
"grad_norm": 0.8644320368766785,
"learning_rate": 5.999716051380484e-06,
"loss": 0.5843,
"step": 7148
},
{
"epoch": 0.45,
"grad_norm": 0.8745971322059631,
"learning_rate": 5.998710754048674e-06,
"loss": 0.6112,
"step": 7149
},
{
"epoch": 0.45,
"grad_norm": 0.9257605075836182,
"learning_rate": 5.997705414664627e-06,
"loss": 0.5828,
"step": 7150
},
{
"epoch": 0.45,
"grad_norm": 0.8571212887763977,
"learning_rate": 5.996700033270676e-06,
"loss": 0.5362,
"step": 7151
},
{
"epoch": 0.45,
"grad_norm": 0.895595908164978,
"learning_rate": 5.995694609909153e-06,
"loss": 0.5773,
"step": 7152
},
{
"epoch": 0.45,
"grad_norm": 0.8279756307601929,
"learning_rate": 5.9946891446223955e-06,
"loss": 0.5738,
"step": 7153
},
{
"epoch": 0.45,
"grad_norm": 0.8749321699142456,
"learning_rate": 5.993683637452736e-06,
"loss": 0.5683,
"step": 7154
},
{
"epoch": 0.45,
"grad_norm": 0.8859013319015503,
"learning_rate": 5.992678088442518e-06,
"loss": 0.6348,
"step": 7155
},
{
"epoch": 0.45,
"grad_norm": 0.8397629857063293,
"learning_rate": 5.991672497634076e-06,
"loss": 0.5788,
"step": 7156
},
{
"epoch": 0.45,
"grad_norm": 0.8651146292686462,
"learning_rate": 5.990666865069759e-06,
"loss": 0.5909,
"step": 7157
},
{
"epoch": 0.45,
"grad_norm": 0.8625426888465881,
"learning_rate": 5.9896611907919034e-06,
"loss": 0.6283,
"step": 7158
},
{
"epoch": 0.45,
"grad_norm": 0.8170305490493774,
"learning_rate": 5.98865547484286e-06,
"loss": 0.5283,
"step": 7159
},
{
"epoch": 0.45,
"grad_norm": 0.8646724820137024,
"learning_rate": 5.9876497172649704e-06,
"loss": 0.5889,
"step": 7160
},
{
"epoch": 0.45,
"grad_norm": 0.9071366786956787,
"learning_rate": 5.986643918100591e-06,
"loss": 0.6299,
"step": 7161
},
{
"epoch": 0.45,
"grad_norm": 0.8904988169670105,
"learning_rate": 5.985638077392066e-06,
"loss": 0.5889,
"step": 7162
},
{
"epoch": 0.45,
"grad_norm": 0.8769485950469971,
"learning_rate": 5.984632195181752e-06,
"loss": 0.5965,
"step": 7163
},
{
"epoch": 0.45,
"grad_norm": 0.8972844481468201,
"learning_rate": 5.983626271512e-06,
"loss": 0.6024,
"step": 7164
},
{
"epoch": 0.45,
"grad_norm": 0.92257159948349,
"learning_rate": 5.982620306425167e-06,
"loss": 0.6158,
"step": 7165
},
{
"epoch": 0.45,
"grad_norm": 0.8915507793426514,
"learning_rate": 5.981614299963614e-06,
"loss": 0.6001,
"step": 7166
},
{
"epoch": 0.45,
"grad_norm": 0.9429782032966614,
"learning_rate": 5.9806082521696936e-06,
"loss": 0.6177,
"step": 7167
},
{
"epoch": 0.45,
"grad_norm": 0.8852347135543823,
"learning_rate": 5.979602163085775e-06,
"loss": 0.5969,
"step": 7168
},
{
"epoch": 0.45,
"grad_norm": 0.880511999130249,
"learning_rate": 5.978596032754215e-06,
"loss": 0.5388,
"step": 7169
},
{
"epoch": 0.45,
"grad_norm": 0.8679192066192627,
"learning_rate": 5.977589861217381e-06,
"loss": 0.5925,
"step": 7170
},
{
"epoch": 0.45,
"grad_norm": 0.8661013245582581,
"learning_rate": 5.9765836485176376e-06,
"loss": 0.5717,
"step": 7171
},
{
"epoch": 0.45,
"grad_norm": 0.8547639846801758,
"learning_rate": 5.9755773946973546e-06,
"loss": 0.6167,
"step": 7172
},
{
"epoch": 0.45,
"grad_norm": 0.8579798936843872,
"learning_rate": 5.974571099798902e-06,
"loss": 0.584,
"step": 7173
},
{
"epoch": 0.45,
"grad_norm": 0.8616194128990173,
"learning_rate": 5.973564763864651e-06,
"loss": 0.6371,
"step": 7174
},
{
"epoch": 0.45,
"grad_norm": 0.8729208111763,
"learning_rate": 5.972558386936973e-06,
"loss": 0.6199,
"step": 7175
},
{
"epoch": 0.45,
"grad_norm": 0.8993321657180786,
"learning_rate": 5.971551969058246e-06,
"loss": 0.615,
"step": 7176
},
{
"epoch": 0.45,
"grad_norm": 0.8909555673599243,
"learning_rate": 5.970545510270845e-06,
"loss": 0.5902,
"step": 7177
},
{
"epoch": 0.45,
"grad_norm": 0.8814198970794678,
"learning_rate": 5.969539010617149e-06,
"loss": 0.5594,
"step": 7178
},
{
"epoch": 0.45,
"grad_norm": 0.8692460060119629,
"learning_rate": 5.968532470139537e-06,
"loss": 0.5863,
"step": 7179
},
{
"epoch": 0.45,
"grad_norm": 0.8972712755203247,
"learning_rate": 5.967525888880392e-06,
"loss": 0.5691,
"step": 7180
},
{
"epoch": 0.45,
"grad_norm": 0.8847329020500183,
"learning_rate": 5.966519266882099e-06,
"loss": 0.58,
"step": 7181
},
{
"epoch": 0.46,
"grad_norm": 0.9059928059577942,
"learning_rate": 5.965512604187041e-06,
"loss": 0.5482,
"step": 7182
},
{
"epoch": 0.46,
"grad_norm": 0.9167791604995728,
"learning_rate": 5.964505900837606e-06,
"loss": 0.5814,
"step": 7183
},
{
"epoch": 0.46,
"grad_norm": 0.9062039852142334,
"learning_rate": 5.963499156876182e-06,
"loss": 0.5696,
"step": 7184
},
{
"epoch": 0.46,
"grad_norm": 0.9285433888435364,
"learning_rate": 5.962492372345163e-06,
"loss": 0.5891,
"step": 7185
},
{
"epoch": 0.46,
"grad_norm": 0.8977758288383484,
"learning_rate": 5.961485547286936e-06,
"loss": 0.634,
"step": 7186
},
{
"epoch": 0.46,
"grad_norm": 0.8944379091262817,
"learning_rate": 5.960478681743897e-06,
"loss": 0.5478,
"step": 7187
},
{
"epoch": 0.46,
"grad_norm": 0.8805668950080872,
"learning_rate": 5.959471775758444e-06,
"loss": 0.6422,
"step": 7188
},
{
"epoch": 0.46,
"grad_norm": 0.8319005370140076,
"learning_rate": 5.9584648293729715e-06,
"loss": 0.5771,
"step": 7189
},
{
"epoch": 0.46,
"grad_norm": 0.834413468837738,
"learning_rate": 5.957457842629879e-06,
"loss": 0.5732,
"step": 7190
},
{
"epoch": 0.46,
"grad_norm": 0.8137858510017395,
"learning_rate": 5.956450815571567e-06,
"loss": 0.545,
"step": 7191
},
{
"epoch": 0.46,
"grad_norm": 0.8825639486312866,
"learning_rate": 5.955443748240439e-06,
"loss": 0.5388,
"step": 7192
},
{
"epoch": 0.46,
"grad_norm": 0.8979218006134033,
"learning_rate": 5.9544366406789e-06,
"loss": 0.6082,
"step": 7193
},
{
"epoch": 0.46,
"grad_norm": 0.8679836392402649,
"learning_rate": 5.953429492929352e-06,
"loss": 0.557,
"step": 7194
},
{
"epoch": 0.46,
"grad_norm": 0.9074422121047974,
"learning_rate": 5.952422305034206e-06,
"loss": 0.5523,
"step": 7195
},
{
"epoch": 0.46,
"grad_norm": 0.8395237326622009,
"learning_rate": 5.95141507703587e-06,
"loss": 0.5881,
"step": 7196
},
{
"epoch": 0.46,
"grad_norm": 0.8752167820930481,
"learning_rate": 5.9504078089767545e-06,
"loss": 0.6212,
"step": 7197
},
{
"epoch": 0.46,
"grad_norm": 0.9009909629821777,
"learning_rate": 5.949400500899272e-06,
"loss": 0.6038,
"step": 7198
},
{
"epoch": 0.46,
"grad_norm": 0.8272262215614319,
"learning_rate": 5.948393152845837e-06,
"loss": 0.492,
"step": 7199
},
{
"epoch": 0.46,
"grad_norm": 0.9322216510772705,
"learning_rate": 5.9473857648588665e-06,
"loss": 0.6591,
"step": 7200
},
{
"epoch": 0.46,
"grad_norm": 0.831906795501709,
"learning_rate": 5.9463783369807775e-06,
"loss": 0.601,
"step": 7201
},
{
"epoch": 0.46,
"grad_norm": 0.844941258430481,
"learning_rate": 5.945370869253987e-06,
"loss": 0.5914,
"step": 7202
},
{
"epoch": 0.46,
"grad_norm": 0.8600195050239563,
"learning_rate": 5.944363361720919e-06,
"loss": 0.6095,
"step": 7203
},
{
"epoch": 0.46,
"grad_norm": 0.8606268167495728,
"learning_rate": 5.943355814423996e-06,
"loss": 0.5522,
"step": 7204
},
{
"epoch": 0.46,
"grad_norm": 0.8842875361442566,
"learning_rate": 5.94234822740564e-06,
"loss": 0.6165,
"step": 7205
},
{
"epoch": 0.46,
"grad_norm": 0.8753437995910645,
"learning_rate": 5.941340600708279e-06,
"loss": 0.5917,
"step": 7206
},
{
"epoch": 0.46,
"grad_norm": 0.9226531386375427,
"learning_rate": 5.9403329343743385e-06,
"loss": 0.6226,
"step": 7207
},
{
"epoch": 0.46,
"grad_norm": 0.9380946755409241,
"learning_rate": 5.939325228446251e-06,
"loss": 0.6713,
"step": 7208
},
{
"epoch": 0.46,
"grad_norm": 0.8104608058929443,
"learning_rate": 5.938317482966446e-06,
"loss": 0.5834,
"step": 7209
},
{
"epoch": 0.46,
"grad_norm": 0.8702726364135742,
"learning_rate": 5.937309697977355e-06,
"loss": 0.5383,
"step": 7210
},
{
"epoch": 0.46,
"grad_norm": 0.8711553812026978,
"learning_rate": 5.936301873521414e-06,
"loss": 0.5848,
"step": 7211
},
{
"epoch": 0.46,
"grad_norm": 0.9385100603103638,
"learning_rate": 5.935294009641057e-06,
"loss": 0.6047,
"step": 7212
},
{
"epoch": 0.46,
"grad_norm": 0.8969570994377136,
"learning_rate": 5.934286106378724e-06,
"loss": 0.6154,
"step": 7213
},
{
"epoch": 0.46,
"grad_norm": 0.9219831824302673,
"learning_rate": 5.933278163776852e-06,
"loss": 0.62,
"step": 7214
},
{
"epoch": 0.46,
"grad_norm": 0.9561776518821716,
"learning_rate": 5.932270181877886e-06,
"loss": 0.6364,
"step": 7215
},
{
"epoch": 0.46,
"grad_norm": 0.8352993130683899,
"learning_rate": 5.9312621607242625e-06,
"loss": 0.5626,
"step": 7216
},
{
"epoch": 0.46,
"grad_norm": 0.8720530271530151,
"learning_rate": 5.93025410035843e-06,
"loss": 0.569,
"step": 7217
},
{
"epoch": 0.46,
"grad_norm": 0.8734807372093201,
"learning_rate": 5.929246000822835e-06,
"loss": 0.6127,
"step": 7218
},
{
"epoch": 0.46,
"grad_norm": 0.9370132088661194,
"learning_rate": 5.928237862159922e-06,
"loss": 0.6122,
"step": 7219
},
{
"epoch": 0.46,
"grad_norm": 0.918322741985321,
"learning_rate": 5.927229684412143e-06,
"loss": 0.6148,
"step": 7220
},
{
"epoch": 0.46,
"grad_norm": 0.8534547090530396,
"learning_rate": 5.926221467621945e-06,
"loss": 0.5618,
"step": 7221
},
{
"epoch": 0.46,
"grad_norm": 0.8477325439453125,
"learning_rate": 5.925213211831785e-06,
"loss": 0.5562,
"step": 7222
},
{
"epoch": 0.46,
"grad_norm": 0.881864070892334,
"learning_rate": 5.924204917084116e-06,
"loss": 0.5994,
"step": 7223
},
{
"epoch": 0.46,
"grad_norm": 0.8880230784416199,
"learning_rate": 5.923196583421392e-06,
"loss": 0.5846,
"step": 7224
},
{
"epoch": 0.46,
"grad_norm": 0.8969435691833496,
"learning_rate": 5.922188210886071e-06,
"loss": 0.57,
"step": 7225
},
{
"epoch": 0.46,
"grad_norm": 0.8515308499336243,
"learning_rate": 5.921179799520613e-06,
"loss": 0.6143,
"step": 7226
},
{
"epoch": 0.46,
"grad_norm": 0.9298543334007263,
"learning_rate": 5.920171349367478e-06,
"loss": 0.6082,
"step": 7227
},
{
"epoch": 0.46,
"grad_norm": 0.8796992301940918,
"learning_rate": 5.919162860469129e-06,
"loss": 0.5899,
"step": 7228
},
{
"epoch": 0.46,
"grad_norm": 0.8747338056564331,
"learning_rate": 5.9181543328680295e-06,
"loss": 0.6593,
"step": 7229
},
{
"epoch": 0.46,
"grad_norm": 0.9241954684257507,
"learning_rate": 5.917145766606645e-06,
"loss": 0.6421,
"step": 7230
},
{
"epoch": 0.46,
"grad_norm": 0.8967227935791016,
"learning_rate": 5.9161371617274425e-06,
"loss": 0.5775,
"step": 7231
},
{
"epoch": 0.46,
"grad_norm": 0.8867378830909729,
"learning_rate": 5.91512851827289e-06,
"loss": 0.5834,
"step": 7232
},
{
"epoch": 0.46,
"grad_norm": 0.8239137530326843,
"learning_rate": 5.914119836285461e-06,
"loss": 0.5708,
"step": 7233
},
{
"epoch": 0.46,
"grad_norm": 0.8044520020484924,
"learning_rate": 5.913111115807626e-06,
"loss": 0.6005,
"step": 7234
},
{
"epoch": 0.46,
"grad_norm": 0.8815402388572693,
"learning_rate": 5.912102356881857e-06,
"loss": 0.5627,
"step": 7235
},
{
"epoch": 0.46,
"grad_norm": 0.9280535578727722,
"learning_rate": 5.91109355955063e-06,
"loss": 0.5705,
"step": 7236
},
{
"epoch": 0.46,
"grad_norm": 0.8546304702758789,
"learning_rate": 5.910084723856424e-06,
"loss": 0.5808,
"step": 7237
},
{
"epoch": 0.46,
"grad_norm": 0.8888003826141357,
"learning_rate": 5.909075849841717e-06,
"loss": 0.596,
"step": 7238
},
{
"epoch": 0.46,
"grad_norm": 0.8333981037139893,
"learning_rate": 5.908066937548987e-06,
"loss": 0.5282,
"step": 7239
},
{
"epoch": 0.46,
"grad_norm": 0.8448134660720825,
"learning_rate": 5.907057987020717e-06,
"loss": 0.5851,
"step": 7240
},
{
"epoch": 0.46,
"grad_norm": 0.8403011560440063,
"learning_rate": 5.906048998299392e-06,
"loss": 0.5617,
"step": 7241
},
{
"epoch": 0.46,
"grad_norm": 0.8628389835357666,
"learning_rate": 5.905039971427494e-06,
"loss": 0.621,
"step": 7242
},
{
"epoch": 0.46,
"grad_norm": 0.9215841889381409,
"learning_rate": 5.9040309064475136e-06,
"loss": 0.6134,
"step": 7243
},
{
"epoch": 0.46,
"grad_norm": 0.858690083026886,
"learning_rate": 5.903021803401933e-06,
"loss": 0.5893,
"step": 7244
},
{
"epoch": 0.46,
"grad_norm": 0.8196396231651306,
"learning_rate": 5.902012662333248e-06,
"loss": 0.5492,
"step": 7245
},
{
"epoch": 0.46,
"grad_norm": 0.8949219584465027,
"learning_rate": 5.9010034832839466e-06,
"loss": 0.6423,
"step": 7246
},
{
"epoch": 0.46,
"grad_norm": 0.9517080187797546,
"learning_rate": 5.899994266296525e-06,
"loss": 0.6048,
"step": 7247
},
{
"epoch": 0.46,
"grad_norm": 0.8856121897697449,
"learning_rate": 5.898985011413473e-06,
"loss": 0.549,
"step": 7248
},
{
"epoch": 0.46,
"grad_norm": 0.8450676798820496,
"learning_rate": 5.897975718677291e-06,
"loss": 0.5636,
"step": 7249
},
{
"epoch": 0.46,
"grad_norm": 0.8568273782730103,
"learning_rate": 5.896966388130475e-06,
"loss": 0.5788,
"step": 7250
},
{
"epoch": 0.46,
"grad_norm": 0.8017422556877136,
"learning_rate": 5.895957019815526e-06,
"loss": 0.5543,
"step": 7251
},
{
"epoch": 0.46,
"grad_norm": 0.9004830718040466,
"learning_rate": 5.894947613774942e-06,
"loss": 0.5613,
"step": 7252
},
{
"epoch": 0.46,
"grad_norm": 0.7895128726959229,
"learning_rate": 5.8939381700512275e-06,
"loss": 0.5361,
"step": 7253
},
{
"epoch": 0.46,
"grad_norm": 0.8576557040214539,
"learning_rate": 5.892928688686887e-06,
"loss": 0.6323,
"step": 7254
},
{
"epoch": 0.46,
"grad_norm": 0.8445666432380676,
"learning_rate": 5.891919169724426e-06,
"loss": 0.5944,
"step": 7255
},
{
"epoch": 0.46,
"grad_norm": 0.8773793578147888,
"learning_rate": 5.890909613206351e-06,
"loss": 0.6197,
"step": 7256
},
{
"epoch": 0.46,
"grad_norm": 0.853339672088623,
"learning_rate": 5.889900019175171e-06,
"loss": 0.5519,
"step": 7257
},
{
"epoch": 0.46,
"grad_norm": 0.9311347007751465,
"learning_rate": 5.888890387673398e-06,
"loss": 0.5779,
"step": 7258
},
{
"epoch": 0.46,
"grad_norm": 0.823020339012146,
"learning_rate": 5.887880718743541e-06,
"loss": 0.5509,
"step": 7259
},
{
"epoch": 0.46,
"grad_norm": 0.8491629362106323,
"learning_rate": 5.886871012428117e-06,
"loss": 0.5738,
"step": 7260
},
{
"epoch": 0.46,
"grad_norm": 0.8535467982292175,
"learning_rate": 5.885861268769641e-06,
"loss": 0.5945,
"step": 7261
},
{
"epoch": 0.46,
"grad_norm": 0.8396848440170288,
"learning_rate": 5.8848514878106275e-06,
"loss": 0.5535,
"step": 7262
},
{
"epoch": 0.46,
"grad_norm": 0.9186087250709534,
"learning_rate": 5.883841669593595e-06,
"loss": 0.6172,
"step": 7263
},
{
"epoch": 0.46,
"grad_norm": 0.877123236656189,
"learning_rate": 5.882831814161065e-06,
"loss": 0.6176,
"step": 7264
},
{
"epoch": 0.46,
"grad_norm": 0.8525793552398682,
"learning_rate": 5.881821921555559e-06,
"loss": 0.5688,
"step": 7265
},
{
"epoch": 0.46,
"grad_norm": 0.8865832090377808,
"learning_rate": 5.880811991819601e-06,
"loss": 0.6026,
"step": 7266
},
{
"epoch": 0.46,
"grad_norm": 0.9039906859397888,
"learning_rate": 5.879802024995712e-06,
"loss": 0.6023,
"step": 7267
},
{
"epoch": 0.46,
"grad_norm": 0.9067119359970093,
"learning_rate": 5.878792021126421e-06,
"loss": 0.6153,
"step": 7268
},
{
"epoch": 0.46,
"grad_norm": 0.8938568830490112,
"learning_rate": 5.877781980254255e-06,
"loss": 0.6366,
"step": 7269
},
{
"epoch": 0.46,
"grad_norm": 0.8254221081733704,
"learning_rate": 5.876771902421743e-06,
"loss": 0.5323,
"step": 7270
},
{
"epoch": 0.46,
"grad_norm": 0.9549217820167542,
"learning_rate": 5.875761787671416e-06,
"loss": 0.6151,
"step": 7271
},
{
"epoch": 0.46,
"grad_norm": 0.8977713584899902,
"learning_rate": 5.874751636045808e-06,
"loss": 0.5451,
"step": 7272
},
{
"epoch": 0.46,
"grad_norm": 0.8796578645706177,
"learning_rate": 5.873741447587451e-06,
"loss": 0.5895,
"step": 7273
},
{
"epoch": 0.46,
"grad_norm": 0.8962649703025818,
"learning_rate": 5.8727312223388814e-06,
"loss": 0.632,
"step": 7274
},
{
"epoch": 0.46,
"grad_norm": 0.8637465238571167,
"learning_rate": 5.871720960342635e-06,
"loss": 0.6002,
"step": 7275
},
{
"epoch": 0.46,
"grad_norm": 0.8970744013786316,
"learning_rate": 5.870710661641252e-06,
"loss": 0.551,
"step": 7276
},
{
"epoch": 0.46,
"grad_norm": 0.8728744387626648,
"learning_rate": 5.869700326277273e-06,
"loss": 0.6214,
"step": 7277
},
{
"epoch": 0.46,
"grad_norm": 0.8638222217559814,
"learning_rate": 5.868689954293239e-06,
"loss": 0.583,
"step": 7278
},
{
"epoch": 0.46,
"grad_norm": 0.8397230505943298,
"learning_rate": 5.86767954573169e-06,
"loss": 0.5571,
"step": 7279
},
{
"epoch": 0.46,
"grad_norm": 0.921284556388855,
"learning_rate": 5.866669100635176e-06,
"loss": 0.6216,
"step": 7280
},
{
"epoch": 0.46,
"grad_norm": 0.9188320636749268,
"learning_rate": 5.865658619046242e-06,
"loss": 0.5926,
"step": 7281
},
{
"epoch": 0.46,
"grad_norm": 0.831984281539917,
"learning_rate": 5.864648101007433e-06,
"loss": 0.5531,
"step": 7282
},
{
"epoch": 0.46,
"grad_norm": 0.8987353444099426,
"learning_rate": 5.863637546561301e-06,
"loss": 0.5643,
"step": 7283
},
{
"epoch": 0.46,
"grad_norm": 0.9372497200965881,
"learning_rate": 5.862626955750397e-06,
"loss": 0.6315,
"step": 7284
},
{
"epoch": 0.46,
"grad_norm": 0.8938281536102295,
"learning_rate": 5.8616163286172726e-06,
"loss": 0.6466,
"step": 7285
},
{
"epoch": 0.46,
"grad_norm": 0.8691145777702332,
"learning_rate": 5.8606056652044805e-06,
"loss": 0.5714,
"step": 7286
},
{
"epoch": 0.46,
"grad_norm": 0.9212241172790527,
"learning_rate": 5.859594965554579e-06,
"loss": 0.6383,
"step": 7287
},
{
"epoch": 0.46,
"grad_norm": 0.8839470148086548,
"learning_rate": 5.858584229710124e-06,
"loss": 0.6086,
"step": 7288
},
{
"epoch": 0.46,
"grad_norm": 0.8715324401855469,
"learning_rate": 5.857573457713674e-06,
"loss": 0.5746,
"step": 7289
},
{
"epoch": 0.46,
"grad_norm": 0.8384736776351929,
"learning_rate": 5.856562649607788e-06,
"loss": 0.6008,
"step": 7290
},
{
"epoch": 0.46,
"grad_norm": 0.9453056454658508,
"learning_rate": 5.855551805435028e-06,
"loss": 0.589,
"step": 7291
},
{
"epoch": 0.46,
"grad_norm": 0.8720511198043823,
"learning_rate": 5.854540925237959e-06,
"loss": 0.622,
"step": 7292
},
{
"epoch": 0.46,
"grad_norm": 0.950019896030426,
"learning_rate": 5.853530009059144e-06,
"loss": 0.6021,
"step": 7293
},
{
"epoch": 0.46,
"grad_norm": 0.9160835146903992,
"learning_rate": 5.852519056941149e-06,
"loss": 0.5935,
"step": 7294
},
{
"epoch": 0.46,
"grad_norm": 0.8492597937583923,
"learning_rate": 5.851508068926542e-06,
"loss": 0.5688,
"step": 7295
},
{
"epoch": 0.46,
"grad_norm": 0.947134792804718,
"learning_rate": 5.850497045057895e-06,
"loss": 0.6288,
"step": 7296
},
{
"epoch": 0.46,
"grad_norm": 0.8388863205909729,
"learning_rate": 5.849485985377774e-06,
"loss": 0.6021,
"step": 7297
},
{
"epoch": 0.46,
"grad_norm": 0.886326014995575,
"learning_rate": 5.848474889928753e-06,
"loss": 0.5793,
"step": 7298
},
{
"epoch": 0.46,
"grad_norm": 0.9255046248435974,
"learning_rate": 5.8474637587534065e-06,
"loss": 0.5786,
"step": 7299
},
{
"epoch": 0.46,
"grad_norm": 0.9360898733139038,
"learning_rate": 5.84645259189431e-06,
"loss": 0.6297,
"step": 7300
},
{
"epoch": 0.46,
"grad_norm": 0.9143325686454773,
"learning_rate": 5.845441389394039e-06,
"loss": 0.631,
"step": 7301
},
{
"epoch": 0.46,
"grad_norm": 0.8604922294616699,
"learning_rate": 5.844430151295171e-06,
"loss": 0.5886,
"step": 7302
},
{
"epoch": 0.46,
"grad_norm": 0.8875581622123718,
"learning_rate": 5.843418877640289e-06,
"loss": 0.6584,
"step": 7303
},
{
"epoch": 0.46,
"grad_norm": 0.8978346586227417,
"learning_rate": 5.842407568471971e-06,
"loss": 0.6007,
"step": 7304
},
{
"epoch": 0.46,
"grad_norm": 0.8847493529319763,
"learning_rate": 5.8413962238328e-06,
"loss": 0.5763,
"step": 7305
},
{
"epoch": 0.46,
"grad_norm": 0.8711661696434021,
"learning_rate": 5.840384843765361e-06,
"loss": 0.5876,
"step": 7306
},
{
"epoch": 0.46,
"grad_norm": 1.0056418180465698,
"learning_rate": 5.839373428312242e-06,
"loss": 0.5685,
"step": 7307
},
{
"epoch": 0.46,
"grad_norm": 0.9755017161369324,
"learning_rate": 5.838361977516026e-06,
"loss": 0.6263,
"step": 7308
},
{
"epoch": 0.46,
"grad_norm": 0.9591142535209656,
"learning_rate": 5.837350491419304e-06,
"loss": 0.6817,
"step": 7309
},
{
"epoch": 0.46,
"grad_norm": 0.8851566314697266,
"learning_rate": 5.836338970064664e-06,
"loss": 0.5556,
"step": 7310
},
{
"epoch": 0.46,
"grad_norm": 0.9328012466430664,
"learning_rate": 5.835327413494702e-06,
"loss": 0.634,
"step": 7311
},
{
"epoch": 0.46,
"grad_norm": 0.8392686247825623,
"learning_rate": 5.834315821752008e-06,
"loss": 0.609,
"step": 7312
},
{
"epoch": 0.46,
"grad_norm": 0.9183607697486877,
"learning_rate": 5.833304194879176e-06,
"loss": 0.6487,
"step": 7313
},
{
"epoch": 0.46,
"grad_norm": 0.8311749696731567,
"learning_rate": 5.832292532918804e-06,
"loss": 0.5665,
"step": 7314
},
{
"epoch": 0.46,
"grad_norm": 0.7938551902770996,
"learning_rate": 5.831280835913489e-06,
"loss": 0.5891,
"step": 7315
},
{
"epoch": 0.46,
"grad_norm": 0.8665035963058472,
"learning_rate": 5.83026910390583e-06,
"loss": 0.6339,
"step": 7316
},
{
"epoch": 0.46,
"grad_norm": 0.8929190039634705,
"learning_rate": 5.829257336938427e-06,
"loss": 0.6195,
"step": 7317
},
{
"epoch": 0.46,
"grad_norm": 0.948819637298584,
"learning_rate": 5.8282455350538815e-06,
"loss": 0.6272,
"step": 7318
},
{
"epoch": 0.46,
"grad_norm": 0.8194432854652405,
"learning_rate": 5.827233698294799e-06,
"loss": 0.6038,
"step": 7319
},
{
"epoch": 0.46,
"grad_norm": 0.8802596926689148,
"learning_rate": 5.826221826703783e-06,
"loss": 0.6913,
"step": 7320
},
{
"epoch": 0.46,
"grad_norm": 0.8479204177856445,
"learning_rate": 5.825209920323438e-06,
"loss": 0.5725,
"step": 7321
},
{
"epoch": 0.46,
"grad_norm": 0.8597609400749207,
"learning_rate": 5.824197979196377e-06,
"loss": 0.595,
"step": 7322
},
{
"epoch": 0.46,
"grad_norm": 0.874144434928894,
"learning_rate": 5.823186003365205e-06,
"loss": 0.6125,
"step": 7323
},
{
"epoch": 0.46,
"grad_norm": 0.904408872127533,
"learning_rate": 5.822173992872534e-06,
"loss": 0.5542,
"step": 7324
},
{
"epoch": 0.46,
"grad_norm": 0.8976813554763794,
"learning_rate": 5.821161947760975e-06,
"loss": 0.5595,
"step": 7325
},
{
"epoch": 0.46,
"grad_norm": 0.9583491683006287,
"learning_rate": 5.820149868073145e-06,
"loss": 0.7058,
"step": 7326
},
{
"epoch": 0.46,
"grad_norm": 0.8859381675720215,
"learning_rate": 5.819137753851656e-06,
"loss": 0.6169,
"step": 7327
},
{
"epoch": 0.46,
"grad_norm": 0.8623588681221008,
"learning_rate": 5.8181256051391276e-06,
"loss": 0.5796,
"step": 7328
},
{
"epoch": 0.46,
"grad_norm": 0.9704835414886475,
"learning_rate": 5.817113421978173e-06,
"loss": 0.6314,
"step": 7329
},
{
"epoch": 0.46,
"grad_norm": 0.8964600563049316,
"learning_rate": 5.816101204411417e-06,
"loss": 0.5712,
"step": 7330
},
{
"epoch": 0.46,
"grad_norm": 0.8039814233779907,
"learning_rate": 5.815088952481478e-06,
"loss": 0.5073,
"step": 7331
},
{
"epoch": 0.46,
"grad_norm": 0.8570845723152161,
"learning_rate": 5.814076666230978e-06,
"loss": 0.613,
"step": 7332
},
{
"epoch": 0.46,
"grad_norm": 0.8780226707458496,
"learning_rate": 5.813064345702542e-06,
"loss": 0.5941,
"step": 7333
},
{
"epoch": 0.46,
"grad_norm": 0.9203137159347534,
"learning_rate": 5.812051990938794e-06,
"loss": 0.5627,
"step": 7334
},
{
"epoch": 0.46,
"grad_norm": 0.9094358682632446,
"learning_rate": 5.811039601982363e-06,
"loss": 0.6046,
"step": 7335
},
{
"epoch": 0.46,
"grad_norm": 0.8867512345314026,
"learning_rate": 5.810027178875875e-06,
"loss": 0.5973,
"step": 7336
},
{
"epoch": 0.46,
"grad_norm": 0.8854659795761108,
"learning_rate": 5.809014721661961e-06,
"loss": 0.6152,
"step": 7337
},
{
"epoch": 0.46,
"grad_norm": 0.8991286158561707,
"learning_rate": 5.808002230383249e-06,
"loss": 0.6124,
"step": 7338
},
{
"epoch": 0.46,
"grad_norm": 0.8840140104293823,
"learning_rate": 5.806989705082377e-06,
"loss": 0.5494,
"step": 7339
},
{
"epoch": 0.47,
"grad_norm": 0.8739617466926575,
"learning_rate": 5.805977145801975e-06,
"loss": 0.6322,
"step": 7340
},
{
"epoch": 0.47,
"grad_norm": 0.8306631445884705,
"learning_rate": 5.8049645525846785e-06,
"loss": 0.526,
"step": 7341
},
{
"epoch": 0.47,
"grad_norm": 0.8292911648750305,
"learning_rate": 5.8039519254731245e-06,
"loss": 0.5709,
"step": 7342
},
{
"epoch": 0.47,
"grad_norm": 0.8418349623680115,
"learning_rate": 5.802939264509954e-06,
"loss": 0.6032,
"step": 7343
},
{
"epoch": 0.47,
"grad_norm": 0.906843364238739,
"learning_rate": 5.801926569737802e-06,
"loss": 0.5835,
"step": 7344
},
{
"epoch": 0.47,
"grad_norm": 0.8574205040931702,
"learning_rate": 5.800913841199312e-06,
"loss": 0.612,
"step": 7345
},
{
"epoch": 0.47,
"grad_norm": 0.9067574739456177,
"learning_rate": 5.799901078937127e-06,
"loss": 0.5773,
"step": 7346
},
{
"epoch": 0.47,
"grad_norm": 0.894777238368988,
"learning_rate": 5.798888282993891e-06,
"loss": 0.5373,
"step": 7347
},
{
"epoch": 0.47,
"grad_norm": 0.9085848331451416,
"learning_rate": 5.7978754534122465e-06,
"loss": 0.5839,
"step": 7348
},
{
"epoch": 0.47,
"grad_norm": 0.8634418249130249,
"learning_rate": 5.7968625902348445e-06,
"loss": 0.5919,
"step": 7349
},
{
"epoch": 0.47,
"grad_norm": 0.8625919818878174,
"learning_rate": 5.7958496935043296e-06,
"loss": 0.5809,
"step": 7350
},
{
"epoch": 0.47,
"grad_norm": 0.8647257089614868,
"learning_rate": 5.794836763263353e-06,
"loss": 0.6084,
"step": 7351
},
{
"epoch": 0.47,
"grad_norm": 0.877373456954956,
"learning_rate": 5.793823799554564e-06,
"loss": 0.5515,
"step": 7352
},
{
"epoch": 0.47,
"grad_norm": 0.8372183442115784,
"learning_rate": 5.792810802420618e-06,
"loss": 0.6313,
"step": 7353
},
{
"epoch": 0.47,
"grad_norm": 0.9380940198898315,
"learning_rate": 5.791797771904168e-06,
"loss": 0.5946,
"step": 7354
},
{
"epoch": 0.47,
"grad_norm": 0.9767922759056091,
"learning_rate": 5.790784708047866e-06,
"loss": 0.6452,
"step": 7355
},
{
"epoch": 0.47,
"grad_norm": 0.8654528856277466,
"learning_rate": 5.789771610894371e-06,
"loss": 0.5671,
"step": 7356
},
{
"epoch": 0.47,
"grad_norm": 0.8537338972091675,
"learning_rate": 5.7887584804863414e-06,
"loss": 0.5607,
"step": 7357
},
{
"epoch": 0.47,
"grad_norm": 0.879467785358429,
"learning_rate": 5.787745316866438e-06,
"loss": 0.5736,
"step": 7358
},
{
"epoch": 0.47,
"grad_norm": 0.8996354341506958,
"learning_rate": 5.786732120077318e-06,
"loss": 0.5326,
"step": 7359
},
{
"epoch": 0.47,
"grad_norm": 0.9491006135940552,
"learning_rate": 5.7857188901616444e-06,
"loss": 0.617,
"step": 7360
},
{
"epoch": 0.47,
"grad_norm": 0.9167372584342957,
"learning_rate": 5.7847056271620815e-06,
"loss": 0.5909,
"step": 7361
},
{
"epoch": 0.47,
"grad_norm": 0.88127201795578,
"learning_rate": 5.783692331121296e-06,
"loss": 0.6109,
"step": 7362
},
{
"epoch": 0.47,
"grad_norm": 0.9358324408531189,
"learning_rate": 5.7826790020819525e-06,
"loss": 0.6228,
"step": 7363
},
{
"epoch": 0.47,
"grad_norm": 0.8748879432678223,
"learning_rate": 5.781665640086719e-06,
"loss": 0.5388,
"step": 7364
},
{
"epoch": 0.47,
"grad_norm": 0.9955645203590393,
"learning_rate": 5.780652245178263e-06,
"loss": 0.5945,
"step": 7365
},
{
"epoch": 0.47,
"grad_norm": 1.0095912218093872,
"learning_rate": 5.779638817399259e-06,
"loss": 0.5464,
"step": 7366
},
{
"epoch": 0.47,
"grad_norm": 0.8746544718742371,
"learning_rate": 5.778625356792376e-06,
"loss": 0.5783,
"step": 7367
},
{
"epoch": 0.47,
"grad_norm": 0.8243803381919861,
"learning_rate": 5.7776118634002865e-06,
"loss": 0.4783,
"step": 7368
},
{
"epoch": 0.47,
"grad_norm": 0.9888680577278137,
"learning_rate": 5.776598337265668e-06,
"loss": 0.5734,
"step": 7369
},
{
"epoch": 0.47,
"grad_norm": 0.8647724986076355,
"learning_rate": 5.775584778431194e-06,
"loss": 0.6255,
"step": 7370
},
{
"epoch": 0.47,
"grad_norm": 0.9031562805175781,
"learning_rate": 5.774571186939543e-06,
"loss": 0.5906,
"step": 7371
},
{
"epoch": 0.47,
"grad_norm": 0.9837010502815247,
"learning_rate": 5.773557562833394e-06,
"loss": 0.6282,
"step": 7372
},
{
"epoch": 0.47,
"grad_norm": 0.9058972597122192,
"learning_rate": 5.772543906155429e-06,
"loss": 0.6202,
"step": 7373
},
{
"epoch": 0.47,
"grad_norm": 0.8994501233100891,
"learning_rate": 5.7715302169483254e-06,
"loss": 0.5639,
"step": 7374
},
{
"epoch": 0.47,
"grad_norm": 0.8851544260978699,
"learning_rate": 5.770516495254769e-06,
"loss": 0.6659,
"step": 7375
},
{
"epoch": 0.47,
"grad_norm": 0.93473881483078,
"learning_rate": 5.769502741117443e-06,
"loss": 0.6339,
"step": 7376
},
{
"epoch": 0.47,
"grad_norm": 0.8682790398597717,
"learning_rate": 5.7684889545790346e-06,
"loss": 0.5733,
"step": 7377
},
{
"epoch": 0.47,
"grad_norm": 0.9060890078544617,
"learning_rate": 5.767475135682228e-06,
"loss": 0.604,
"step": 7378
},
{
"epoch": 0.47,
"grad_norm": 0.862415075302124,
"learning_rate": 5.766461284469714e-06,
"loss": 0.6114,
"step": 7379
},
{
"epoch": 0.47,
"grad_norm": 0.9211068153381348,
"learning_rate": 5.765447400984182e-06,
"loss": 0.6212,
"step": 7380
},
{
"epoch": 0.47,
"grad_norm": 0.9584636092185974,
"learning_rate": 5.7644334852683236e-06,
"loss": 0.6299,
"step": 7381
},
{
"epoch": 0.47,
"grad_norm": 0.8537229299545288,
"learning_rate": 5.763419537364828e-06,
"loss": 0.5601,
"step": 7382
},
{
"epoch": 0.47,
"grad_norm": 0.9321445822715759,
"learning_rate": 5.762405557316393e-06,
"loss": 0.598,
"step": 7383
},
{
"epoch": 0.47,
"grad_norm": 0.9108582735061646,
"learning_rate": 5.761391545165713e-06,
"loss": 0.5775,
"step": 7384
},
{
"epoch": 0.47,
"grad_norm": 0.903030514717102,
"learning_rate": 5.760377500955483e-06,
"loss": 0.622,
"step": 7385
},
{
"epoch": 0.47,
"grad_norm": 0.88475102186203,
"learning_rate": 5.759363424728401e-06,
"loss": 0.6028,
"step": 7386
},
{
"epoch": 0.47,
"grad_norm": 0.952921986579895,
"learning_rate": 5.758349316527166e-06,
"loss": 0.6262,
"step": 7387
},
{
"epoch": 0.47,
"grad_norm": 0.867470383644104,
"learning_rate": 5.7573351763944815e-06,
"loss": 0.5876,
"step": 7388
},
{
"epoch": 0.47,
"grad_norm": 0.8601709604263306,
"learning_rate": 5.756321004373047e-06,
"loss": 0.5715,
"step": 7389
},
{
"epoch": 0.47,
"grad_norm": 0.910184919834137,
"learning_rate": 5.755306800505564e-06,
"loss": 0.6218,
"step": 7390
},
{
"epoch": 0.47,
"grad_norm": 0.9151275157928467,
"learning_rate": 5.754292564834741e-06,
"loss": 0.6137,
"step": 7391
},
{
"epoch": 0.47,
"grad_norm": 0.9013386964797974,
"learning_rate": 5.753278297403282e-06,
"loss": 0.6175,
"step": 7392
},
{
"epoch": 0.47,
"grad_norm": 0.8471426367759705,
"learning_rate": 5.752263998253893e-06,
"loss": 0.5546,
"step": 7393
},
{
"epoch": 0.47,
"grad_norm": 0.9509868025779724,
"learning_rate": 5.751249667429285e-06,
"loss": 0.6276,
"step": 7394
},
{
"epoch": 0.47,
"grad_norm": 0.9001954793930054,
"learning_rate": 5.7502353049721674e-06,
"loss": 0.595,
"step": 7395
},
{
"epoch": 0.47,
"grad_norm": 0.8812659382820129,
"learning_rate": 5.74922091092525e-06,
"loss": 0.5687,
"step": 7396
},
{
"epoch": 0.47,
"grad_norm": 0.8915801644325256,
"learning_rate": 5.748206485331247e-06,
"loss": 0.6137,
"step": 7397
},
{
"epoch": 0.47,
"grad_norm": 0.8607172966003418,
"learning_rate": 5.747192028232872e-06,
"loss": 0.5964,
"step": 7398
},
{
"epoch": 0.47,
"grad_norm": 0.9319098591804504,
"learning_rate": 5.746177539672841e-06,
"loss": 0.5956,
"step": 7399
},
{
"epoch": 0.47,
"grad_norm": 0.9447979927062988,
"learning_rate": 5.745163019693867e-06,
"loss": 0.5742,
"step": 7400
},
{
"epoch": 0.47,
"grad_norm": 0.9158990979194641,
"learning_rate": 5.744148468338671e-06,
"loss": 0.6162,
"step": 7401
},
{
"epoch": 0.47,
"grad_norm": 0.9429414868354797,
"learning_rate": 5.743133885649972e-06,
"loss": 0.6195,
"step": 7402
},
{
"epoch": 0.47,
"grad_norm": 0.8871979117393494,
"learning_rate": 5.742119271670491e-06,
"loss": 0.5716,
"step": 7403
},
{
"epoch": 0.47,
"grad_norm": 0.9028074741363525,
"learning_rate": 5.741104626442948e-06,
"loss": 0.6184,
"step": 7404
},
{
"epoch": 0.47,
"grad_norm": 0.8605913519859314,
"learning_rate": 5.740089950010068e-06,
"loss": 0.5238,
"step": 7405
},
{
"epoch": 0.47,
"grad_norm": 0.8414510488510132,
"learning_rate": 5.739075242414575e-06,
"loss": 0.5995,
"step": 7406
},
{
"epoch": 0.47,
"grad_norm": 0.851617157459259,
"learning_rate": 5.738060503699194e-06,
"loss": 0.5505,
"step": 7407
},
{
"epoch": 0.47,
"grad_norm": 0.9584022164344788,
"learning_rate": 5.737045733906653e-06,
"loss": 0.5901,
"step": 7408
},
{
"epoch": 0.47,
"grad_norm": 0.9968786239624023,
"learning_rate": 5.7360309330796805e-06,
"loss": 0.6653,
"step": 7409
},
{
"epoch": 0.47,
"grad_norm": 0.9742656350135803,
"learning_rate": 5.735016101261005e-06,
"loss": 0.6075,
"step": 7410
},
{
"epoch": 0.47,
"grad_norm": 0.8474623560905457,
"learning_rate": 5.7340012384933595e-06,
"loss": 0.6144,
"step": 7411
},
{
"epoch": 0.47,
"grad_norm": 0.9446218609809875,
"learning_rate": 5.732986344819475e-06,
"loss": 0.6079,
"step": 7412
},
{
"epoch": 0.47,
"grad_norm": 0.842605710029602,
"learning_rate": 5.731971420282085e-06,
"loss": 0.5098,
"step": 7413
},
{
"epoch": 0.47,
"grad_norm": 0.9913969039916992,
"learning_rate": 5.730956464923926e-06,
"loss": 0.594,
"step": 7414
},
{
"epoch": 0.47,
"grad_norm": 0.8629537224769592,
"learning_rate": 5.729941478787732e-06,
"loss": 0.5961,
"step": 7415
},
{
"epoch": 0.47,
"grad_norm": 0.897515058517456,
"learning_rate": 5.728926461916242e-06,
"loss": 0.5481,
"step": 7416
},
{
"epoch": 0.47,
"grad_norm": 0.8936898708343506,
"learning_rate": 5.727911414352192e-06,
"loss": 0.5766,
"step": 7417
},
{
"epoch": 0.47,
"grad_norm": 0.936795711517334,
"learning_rate": 5.726896336138328e-06,
"loss": 0.6159,
"step": 7418
},
{
"epoch": 0.47,
"grad_norm": 0.83265620470047,
"learning_rate": 5.725881227317386e-06,
"loss": 0.5623,
"step": 7419
},
{
"epoch": 0.47,
"grad_norm": 0.8322146534919739,
"learning_rate": 5.724866087932113e-06,
"loss": 0.5664,
"step": 7420
},
{
"epoch": 0.47,
"grad_norm": 0.8756260871887207,
"learning_rate": 5.723850918025246e-06,
"loss": 0.5767,
"step": 7421
},
{
"epoch": 0.47,
"grad_norm": 0.9313575029373169,
"learning_rate": 5.722835717639539e-06,
"loss": 0.5878,
"step": 7422
},
{
"epoch": 0.47,
"grad_norm": 0.9568567872047424,
"learning_rate": 5.721820486817733e-06,
"loss": 0.5955,
"step": 7423
},
{
"epoch": 0.47,
"grad_norm": 0.8222607970237732,
"learning_rate": 5.720805225602579e-06,
"loss": 0.5706,
"step": 7424
},
{
"epoch": 0.47,
"grad_norm": 0.8587454557418823,
"learning_rate": 5.719789934036821e-06,
"loss": 0.5963,
"step": 7425
},
{
"epoch": 0.47,
"grad_norm": 0.8979743123054504,
"learning_rate": 5.718774612163216e-06,
"loss": 0.5787,
"step": 7426
},
{
"epoch": 0.47,
"grad_norm": 0.8900598883628845,
"learning_rate": 5.717759260024511e-06,
"loss": 0.6332,
"step": 7427
},
{
"epoch": 0.47,
"grad_norm": 0.8718476891517639,
"learning_rate": 5.716743877663462e-06,
"loss": 0.5851,
"step": 7428
},
{
"epoch": 0.47,
"grad_norm": 0.9177529215812683,
"learning_rate": 5.715728465122821e-06,
"loss": 0.6083,
"step": 7429
},
{
"epoch": 0.47,
"grad_norm": 0.9315070509910583,
"learning_rate": 5.714713022445344e-06,
"loss": 0.6009,
"step": 7430
},
{
"epoch": 0.47,
"grad_norm": 0.8802310228347778,
"learning_rate": 5.713697549673788e-06,
"loss": 0.5769,
"step": 7431
},
{
"epoch": 0.47,
"grad_norm": 0.9262186884880066,
"learning_rate": 5.712682046850909e-06,
"loss": 0.5989,
"step": 7432
},
{
"epoch": 0.47,
"grad_norm": 0.939360499382019,
"learning_rate": 5.711666514019472e-06,
"loss": 0.6666,
"step": 7433
},
{
"epoch": 0.47,
"grad_norm": 0.8618243336677551,
"learning_rate": 5.710650951222231e-06,
"loss": 0.5733,
"step": 7434
},
{
"epoch": 0.47,
"grad_norm": 0.8601074814796448,
"learning_rate": 5.709635358501952e-06,
"loss": 0.5871,
"step": 7435
},
{
"epoch": 0.47,
"grad_norm": 0.9362636804580688,
"learning_rate": 5.708619735901394e-06,
"loss": 0.573,
"step": 7436
},
{
"epoch": 0.47,
"grad_norm": 0.8923870325088501,
"learning_rate": 5.707604083463327e-06,
"loss": 0.5884,
"step": 7437
},
{
"epoch": 0.47,
"grad_norm": 0.9044986367225647,
"learning_rate": 5.706588401230513e-06,
"loss": 0.573,
"step": 7438
},
{
"epoch": 0.47,
"grad_norm": 0.8761860728263855,
"learning_rate": 5.70557268924572e-06,
"loss": 0.6136,
"step": 7439
},
{
"epoch": 0.47,
"grad_norm": 0.8322923183441162,
"learning_rate": 5.7045569475517126e-06,
"loss": 0.5395,
"step": 7440
},
{
"epoch": 0.47,
"grad_norm": 0.9440213441848755,
"learning_rate": 5.703541176191266e-06,
"loss": 0.6287,
"step": 7441
},
{
"epoch": 0.47,
"grad_norm": 0.815701425075531,
"learning_rate": 5.702525375207147e-06,
"loss": 0.536,
"step": 7442
},
{
"epoch": 0.47,
"grad_norm": 0.9086024761199951,
"learning_rate": 5.70150954464213e-06,
"loss": 0.537,
"step": 7443
},
{
"epoch": 0.47,
"grad_norm": 0.9420557618141174,
"learning_rate": 5.700493684538984e-06,
"loss": 0.5822,
"step": 7444
},
{
"epoch": 0.47,
"grad_norm": 0.8448433876037598,
"learning_rate": 5.699477794940487e-06,
"loss": 0.599,
"step": 7445
},
{
"epoch": 0.47,
"grad_norm": 0.8653044104576111,
"learning_rate": 5.698461875889414e-06,
"loss": 0.5989,
"step": 7446
},
{
"epoch": 0.47,
"grad_norm": 0.8957589864730835,
"learning_rate": 5.6974459274285395e-06,
"loss": 0.6335,
"step": 7447
},
{
"epoch": 0.47,
"grad_norm": 0.8396274447441101,
"learning_rate": 5.696429949600643e-06,
"loss": 0.5672,
"step": 7448
},
{
"epoch": 0.47,
"grad_norm": 0.900188148021698,
"learning_rate": 5.695413942448505e-06,
"loss": 0.5975,
"step": 7449
},
{
"epoch": 0.47,
"grad_norm": 0.8910940289497375,
"learning_rate": 5.694397906014907e-06,
"loss": 0.5868,
"step": 7450
},
{
"epoch": 0.47,
"grad_norm": 0.8663356900215149,
"learning_rate": 5.693381840342626e-06,
"loss": 0.5789,
"step": 7451
},
{
"epoch": 0.47,
"grad_norm": 0.8898122310638428,
"learning_rate": 5.692365745474448e-06,
"loss": 0.595,
"step": 7452
},
{
"epoch": 0.47,
"grad_norm": 0.7971364259719849,
"learning_rate": 5.691349621453158e-06,
"loss": 0.5529,
"step": 7453
},
{
"epoch": 0.47,
"grad_norm": 0.813381552696228,
"learning_rate": 5.6903334683215416e-06,
"loss": 0.5635,
"step": 7454
},
{
"epoch": 0.47,
"grad_norm": 0.9315560460090637,
"learning_rate": 5.689317286122382e-06,
"loss": 0.6557,
"step": 7455
},
{
"epoch": 0.47,
"grad_norm": 0.9134712815284729,
"learning_rate": 5.68830107489847e-06,
"loss": 0.5853,
"step": 7456
},
{
"epoch": 0.47,
"grad_norm": 0.966414749622345,
"learning_rate": 5.687284834692595e-06,
"loss": 0.5889,
"step": 7457
},
{
"epoch": 0.47,
"grad_norm": 0.9134582281112671,
"learning_rate": 5.686268565547547e-06,
"loss": 0.6798,
"step": 7458
},
{
"epoch": 0.47,
"grad_norm": 0.8649297952651978,
"learning_rate": 5.685252267506116e-06,
"loss": 0.5932,
"step": 7459
},
{
"epoch": 0.47,
"grad_norm": 0.94404536485672,
"learning_rate": 5.6842359406110945e-06,
"loss": 0.6098,
"step": 7460
},
{
"epoch": 0.47,
"grad_norm": 0.8642643094062805,
"learning_rate": 5.683219584905281e-06,
"loss": 0.5948,
"step": 7461
},
{
"epoch": 0.47,
"grad_norm": 0.9007630944252014,
"learning_rate": 5.682203200431465e-06,
"loss": 0.5938,
"step": 7462
},
{
"epoch": 0.47,
"grad_norm": 0.901642918586731,
"learning_rate": 5.6811867872324465e-06,
"loss": 0.6043,
"step": 7463
},
{
"epoch": 0.47,
"grad_norm": 0.9297770857810974,
"learning_rate": 5.680170345351021e-06,
"loss": 0.6118,
"step": 7464
},
{
"epoch": 0.47,
"grad_norm": 0.9477009177207947,
"learning_rate": 5.67915387482999e-06,
"loss": 0.6211,
"step": 7465
},
{
"epoch": 0.47,
"grad_norm": 0.9416295289993286,
"learning_rate": 5.678137375712152e-06,
"loss": 0.6327,
"step": 7466
},
{
"epoch": 0.47,
"grad_norm": 0.8348994851112366,
"learning_rate": 5.6771208480403065e-06,
"loss": 0.5896,
"step": 7467
},
{
"epoch": 0.47,
"grad_norm": 0.8662605285644531,
"learning_rate": 5.6761042918572585e-06,
"loss": 0.6347,
"step": 7468
},
{
"epoch": 0.47,
"grad_norm": 0.9073401689529419,
"learning_rate": 5.675087707205811e-06,
"loss": 0.5953,
"step": 7469
},
{
"epoch": 0.47,
"grad_norm": 0.8502627015113831,
"learning_rate": 5.674071094128768e-06,
"loss": 0.56,
"step": 7470
},
{
"epoch": 0.47,
"grad_norm": 0.8814505934715271,
"learning_rate": 5.673054452668936e-06,
"loss": 0.5533,
"step": 7471
},
{
"epoch": 0.47,
"grad_norm": 0.8160227537155151,
"learning_rate": 5.672037782869123e-06,
"loss": 0.5823,
"step": 7472
},
{
"epoch": 0.47,
"grad_norm": 0.9062105417251587,
"learning_rate": 5.671021084772137e-06,
"loss": 0.629,
"step": 7473
},
{
"epoch": 0.47,
"grad_norm": 0.8528299927711487,
"learning_rate": 5.670004358420786e-06,
"loss": 0.5908,
"step": 7474
},
{
"epoch": 0.47,
"grad_norm": 0.8880297541618347,
"learning_rate": 5.668987603857884e-06,
"loss": 0.5729,
"step": 7475
},
{
"epoch": 0.47,
"grad_norm": 0.8712710738182068,
"learning_rate": 5.6679708211262415e-06,
"loss": 0.5703,
"step": 7476
},
{
"epoch": 0.47,
"grad_norm": 0.8730039596557617,
"learning_rate": 5.66695401026867e-06,
"loss": 0.5824,
"step": 7477
},
{
"epoch": 0.47,
"grad_norm": 0.9115322232246399,
"learning_rate": 5.665937171327985e-06,
"loss": 0.6206,
"step": 7478
},
{
"epoch": 0.47,
"grad_norm": 0.8727670311927795,
"learning_rate": 5.664920304347004e-06,
"loss": 0.5998,
"step": 7479
},
{
"epoch": 0.47,
"grad_norm": 0.8780418634414673,
"learning_rate": 5.6639034093685416e-06,
"loss": 0.6111,
"step": 7480
},
{
"epoch": 0.47,
"grad_norm": 0.902927577495575,
"learning_rate": 5.662886486435415e-06,
"loss": 0.5843,
"step": 7481
},
{
"epoch": 0.47,
"grad_norm": 0.9045441746711731,
"learning_rate": 5.6618695355904456e-06,
"loss": 0.5906,
"step": 7482
},
{
"epoch": 0.47,
"grad_norm": 0.8809554576873779,
"learning_rate": 5.660852556876452e-06,
"loss": 0.5882,
"step": 7483
},
{
"epoch": 0.47,
"grad_norm": 0.8581720590591431,
"learning_rate": 5.659835550336257e-06,
"loss": 0.5565,
"step": 7484
},
{
"epoch": 0.47,
"grad_norm": 0.836743175983429,
"learning_rate": 5.658818516012681e-06,
"loss": 0.6018,
"step": 7485
},
{
"epoch": 0.47,
"grad_norm": 0.8696823716163635,
"learning_rate": 5.65780145394855e-06,
"loss": 0.5717,
"step": 7486
},
{
"epoch": 0.47,
"grad_norm": 0.8720874786376953,
"learning_rate": 5.656784364186687e-06,
"loss": 0.6145,
"step": 7487
},
{
"epoch": 0.47,
"grad_norm": 0.8161273002624512,
"learning_rate": 5.655767246769921e-06,
"loss": 0.5921,
"step": 7488
},
{
"epoch": 0.47,
"grad_norm": 0.9067077040672302,
"learning_rate": 5.6547501017410765e-06,
"loss": 0.5551,
"step": 7489
},
{
"epoch": 0.47,
"grad_norm": 0.8600305318832397,
"learning_rate": 5.6537329291429835e-06,
"loss": 0.5712,
"step": 7490
},
{
"epoch": 0.47,
"grad_norm": 0.9493293762207031,
"learning_rate": 5.652715729018471e-06,
"loss": 0.5631,
"step": 7491
},
{
"epoch": 0.47,
"grad_norm": 0.9080491065979004,
"learning_rate": 5.65169850141037e-06,
"loss": 0.6021,
"step": 7492
},
{
"epoch": 0.47,
"grad_norm": 0.8665675520896912,
"learning_rate": 5.650681246361511e-06,
"loss": 0.5662,
"step": 7493
},
{
"epoch": 0.47,
"grad_norm": 0.8644539713859558,
"learning_rate": 5.649663963914729e-06,
"loss": 0.5776,
"step": 7494
},
{
"epoch": 0.47,
"grad_norm": 0.8949490785598755,
"learning_rate": 5.6486466541128575e-06,
"loss": 0.6025,
"step": 7495
},
{
"epoch": 0.47,
"grad_norm": 0.8618309497833252,
"learning_rate": 5.6476293169987314e-06,
"loss": 0.5809,
"step": 7496
},
{
"epoch": 0.47,
"grad_norm": 0.9121665954589844,
"learning_rate": 5.646611952615188e-06,
"loss": 0.5776,
"step": 7497
},
{
"epoch": 0.48,
"grad_norm": 0.9601690173149109,
"learning_rate": 5.645594561005064e-06,
"loss": 0.6445,
"step": 7498
},
{
"epoch": 0.48,
"grad_norm": 0.8770933151245117,
"learning_rate": 5.6445771422112005e-06,
"loss": 0.5561,
"step": 7499
},
{
"epoch": 0.48,
"grad_norm": 0.9160272479057312,
"learning_rate": 5.643559696276435e-06,
"loss": 0.5923,
"step": 7500
},
{
"epoch": 0.48,
"grad_norm": 0.8537315726280212,
"learning_rate": 5.6425422232436085e-06,
"loss": 0.5297,
"step": 7501
},
{
"epoch": 0.48,
"grad_norm": 0.9248793125152588,
"learning_rate": 5.641524723155566e-06,
"loss": 0.6002,
"step": 7502
},
{
"epoch": 0.48,
"grad_norm": 0.8903481960296631,
"learning_rate": 5.6405071960551485e-06,
"loss": 0.5629,
"step": 7503
},
{
"epoch": 0.48,
"grad_norm": 0.8650072813034058,
"learning_rate": 5.639489641985201e-06,
"loss": 0.6217,
"step": 7504
},
{
"epoch": 0.48,
"grad_norm": 0.9362791180610657,
"learning_rate": 5.638472060988569e-06,
"loss": 0.6735,
"step": 7505
},
{
"epoch": 0.48,
"grad_norm": 0.8435890078544617,
"learning_rate": 5.6374544531081e-06,
"loss": 0.5357,
"step": 7506
},
{
"epoch": 0.48,
"grad_norm": 0.8968993425369263,
"learning_rate": 5.636436818386641e-06,
"loss": 0.6284,
"step": 7507
},
{
"epoch": 0.48,
"grad_norm": 0.8205461502075195,
"learning_rate": 5.635419156867043e-06,
"loss": 0.5537,
"step": 7508
},
{
"epoch": 0.48,
"grad_norm": 0.8897349238395691,
"learning_rate": 5.634401468592152e-06,
"loss": 0.5937,
"step": 7509
},
{
"epoch": 0.48,
"grad_norm": 0.8807753920555115,
"learning_rate": 5.6333837536048255e-06,
"loss": 0.5546,
"step": 7510
},
{
"epoch": 0.48,
"grad_norm": 0.8418203592300415,
"learning_rate": 5.63236601194791e-06,
"loss": 0.5596,
"step": 7511
},
{
"epoch": 0.48,
"grad_norm": 0.849711000919342,
"learning_rate": 5.631348243664263e-06,
"loss": 0.6252,
"step": 7512
},
{
"epoch": 0.48,
"grad_norm": 0.9313555955886841,
"learning_rate": 5.630330448796736e-06,
"loss": 0.6325,
"step": 7513
},
{
"epoch": 0.48,
"grad_norm": 0.9534391164779663,
"learning_rate": 5.629312627388188e-06,
"loss": 0.6615,
"step": 7514
},
{
"epoch": 0.48,
"grad_norm": 0.8927303552627563,
"learning_rate": 5.628294779481474e-06,
"loss": 0.5842,
"step": 7515
},
{
"epoch": 0.48,
"grad_norm": 0.8985401391983032,
"learning_rate": 5.6272769051194535e-06,
"loss": 0.6764,
"step": 7516
},
{
"epoch": 0.48,
"grad_norm": 0.8968629240989685,
"learning_rate": 5.626259004344983e-06,
"loss": 0.5754,
"step": 7517
},
{
"epoch": 0.48,
"grad_norm": 0.8578152656555176,
"learning_rate": 5.625241077200926e-06,
"loss": 0.6018,
"step": 7518
},
{
"epoch": 0.48,
"grad_norm": 0.8742191791534424,
"learning_rate": 5.624223123730141e-06,
"loss": 0.5841,
"step": 7519
},
{
"epoch": 0.48,
"grad_norm": 0.8463053703308105,
"learning_rate": 5.6232051439754935e-06,
"loss": 0.5883,
"step": 7520
},
{
"epoch": 0.48,
"grad_norm": 0.8800484538078308,
"learning_rate": 5.622187137979843e-06,
"loss": 0.544,
"step": 7521
},
{
"epoch": 0.48,
"grad_norm": 0.8594365119934082,
"learning_rate": 5.621169105786057e-06,
"loss": 0.5719,
"step": 7522
},
{
"epoch": 0.48,
"grad_norm": 0.8680949807167053,
"learning_rate": 5.620151047437004e-06,
"loss": 0.6466,
"step": 7523
},
{
"epoch": 0.48,
"grad_norm": 0.8772831559181213,
"learning_rate": 5.619132962975544e-06,
"loss": 0.6038,
"step": 7524
},
{
"epoch": 0.48,
"grad_norm": 0.9212350845336914,
"learning_rate": 5.6181148524445506e-06,
"loss": 0.586,
"step": 7525
},
{
"epoch": 0.48,
"grad_norm": 0.9081183671951294,
"learning_rate": 5.617096715886889e-06,
"loss": 0.6006,
"step": 7526
},
{
"epoch": 0.48,
"grad_norm": 0.9265548586845398,
"learning_rate": 5.616078553345434e-06,
"loss": 0.6323,
"step": 7527
},
{
"epoch": 0.48,
"grad_norm": 0.8656793236732483,
"learning_rate": 5.615060364863053e-06,
"loss": 0.5746,
"step": 7528
},
{
"epoch": 0.48,
"grad_norm": 0.8917694091796875,
"learning_rate": 5.6140421504826205e-06,
"loss": 0.5804,
"step": 7529
},
{
"epoch": 0.48,
"grad_norm": 0.9244688153266907,
"learning_rate": 5.6130239102470075e-06,
"loss": 0.6397,
"step": 7530
},
{
"epoch": 0.48,
"grad_norm": 0.873084306716919,
"learning_rate": 5.612005644199092e-06,
"loss": 0.5719,
"step": 7531
},
{
"epoch": 0.48,
"grad_norm": 0.8609447479248047,
"learning_rate": 5.610987352381747e-06,
"loss": 0.5613,
"step": 7532
},
{
"epoch": 0.48,
"grad_norm": 0.9368327856063843,
"learning_rate": 5.60996903483785e-06,
"loss": 0.6308,
"step": 7533
},
{
"epoch": 0.48,
"grad_norm": 0.8809864521026611,
"learning_rate": 5.608950691610279e-06,
"loss": 0.5802,
"step": 7534
},
{
"epoch": 0.48,
"grad_norm": 0.926231324672699,
"learning_rate": 5.607932322741912e-06,
"loss": 0.6366,
"step": 7535
},
{
"epoch": 0.48,
"grad_norm": 0.9103180766105652,
"learning_rate": 5.60691392827563e-06,
"loss": 0.6085,
"step": 7536
},
{
"epoch": 0.48,
"grad_norm": 0.9746480584144592,
"learning_rate": 5.605895508254315e-06,
"loss": 0.5465,
"step": 7537
},
{
"epoch": 0.48,
"grad_norm": 0.9505468606948853,
"learning_rate": 5.604877062720848e-06,
"loss": 0.6671,
"step": 7538
},
{
"epoch": 0.48,
"grad_norm": 0.8591948747634888,
"learning_rate": 5.603858591718111e-06,
"loss": 0.5763,
"step": 7539
},
{
"epoch": 0.48,
"grad_norm": 0.9409388303756714,
"learning_rate": 5.602840095288989e-06,
"loss": 0.6302,
"step": 7540
},
{
"epoch": 0.48,
"grad_norm": 0.8928959965705872,
"learning_rate": 5.6018215734763685e-06,
"loss": 0.6324,
"step": 7541
},
{
"epoch": 0.48,
"grad_norm": 0.8751816749572754,
"learning_rate": 5.600803026323136e-06,
"loss": 0.5362,
"step": 7542
},
{
"epoch": 0.48,
"grad_norm": 0.9378029704093933,
"learning_rate": 5.599784453872177e-06,
"loss": 0.5904,
"step": 7543
},
{
"epoch": 0.48,
"grad_norm": 0.926987886428833,
"learning_rate": 5.5987658561663805e-06,
"loss": 0.6353,
"step": 7544
},
{
"epoch": 0.48,
"grad_norm": 0.8623868823051453,
"learning_rate": 5.597747233248637e-06,
"loss": 0.5966,
"step": 7545
},
{
"epoch": 0.48,
"grad_norm": 0.8656692504882812,
"learning_rate": 5.596728585161838e-06,
"loss": 0.5803,
"step": 7546
},
{
"epoch": 0.48,
"grad_norm": 0.8522694110870361,
"learning_rate": 5.595709911948873e-06,
"loss": 0.6405,
"step": 7547
},
{
"epoch": 0.48,
"grad_norm": 0.8766559958457947,
"learning_rate": 5.5946912136526365e-06,
"loss": 0.6045,
"step": 7548
},
{
"epoch": 0.48,
"grad_norm": 0.8855379223823547,
"learning_rate": 5.593672490316022e-06,
"loss": 0.6335,
"step": 7549
},
{
"epoch": 0.48,
"grad_norm": 0.8828189969062805,
"learning_rate": 5.5926537419819234e-06,
"loss": 0.6326,
"step": 7550
},
{
"epoch": 0.48,
"grad_norm": 0.8517670035362244,
"learning_rate": 5.591634968693238e-06,
"loss": 0.6034,
"step": 7551
},
{
"epoch": 0.48,
"grad_norm": 0.8935458064079285,
"learning_rate": 5.590616170492862e-06,
"loss": 0.628,
"step": 7552
},
{
"epoch": 0.48,
"grad_norm": 0.9340348839759827,
"learning_rate": 5.589597347423696e-06,
"loss": 0.6073,
"step": 7553
},
{
"epoch": 0.48,
"grad_norm": 0.8754671812057495,
"learning_rate": 5.588578499528633e-06,
"loss": 0.584,
"step": 7554
},
{
"epoch": 0.48,
"grad_norm": 0.8489634990692139,
"learning_rate": 5.587559626850578e-06,
"loss": 0.5701,
"step": 7555
},
{
"epoch": 0.48,
"grad_norm": 0.871929407119751,
"learning_rate": 5.586540729432431e-06,
"loss": 0.5916,
"step": 7556
},
{
"epoch": 0.48,
"grad_norm": 0.905117928981781,
"learning_rate": 5.585521807317097e-06,
"loss": 0.6647,
"step": 7557
},
{
"epoch": 0.48,
"grad_norm": 0.9271255731582642,
"learning_rate": 5.584502860547474e-06,
"loss": 0.6214,
"step": 7558
},
{
"epoch": 0.48,
"grad_norm": 0.8846172094345093,
"learning_rate": 5.5834838891664685e-06,
"loss": 0.6159,
"step": 7559
},
{
"epoch": 0.48,
"grad_norm": 0.8657467365264893,
"learning_rate": 5.582464893216987e-06,
"loss": 0.5704,
"step": 7560
},
{
"epoch": 0.48,
"grad_norm": 0.8502207398414612,
"learning_rate": 5.5814458727419365e-06,
"loss": 0.5878,
"step": 7561
},
{
"epoch": 0.48,
"grad_norm": 0.8335548639297485,
"learning_rate": 5.580426827784221e-06,
"loss": 0.612,
"step": 7562
},
{
"epoch": 0.48,
"grad_norm": 0.8756707310676575,
"learning_rate": 5.579407758386751e-06,
"loss": 0.6283,
"step": 7563
},
{
"epoch": 0.48,
"grad_norm": 0.9669787883758545,
"learning_rate": 5.578388664592435e-06,
"loss": 0.5999,
"step": 7564
},
{
"epoch": 0.48,
"grad_norm": 0.9120867848396301,
"learning_rate": 5.577369546444188e-06,
"loss": 0.6249,
"step": 7565
},
{
"epoch": 0.48,
"grad_norm": 0.9110515117645264,
"learning_rate": 5.576350403984915e-06,
"loss": 0.6314,
"step": 7566
},
{
"epoch": 0.48,
"grad_norm": 0.9408080577850342,
"learning_rate": 5.575331237257532e-06,
"loss": 0.5728,
"step": 7567
},
{
"epoch": 0.48,
"grad_norm": 0.9744350910186768,
"learning_rate": 5.574312046304954e-06,
"loss": 0.6502,
"step": 7568
},
{
"epoch": 0.48,
"grad_norm": 0.9024521112442017,
"learning_rate": 5.5732928311700906e-06,
"loss": 0.5861,
"step": 7569
},
{
"epoch": 0.48,
"grad_norm": 0.8772839903831482,
"learning_rate": 5.5722735918958614e-06,
"loss": 0.5825,
"step": 7570
},
{
"epoch": 0.48,
"grad_norm": 0.9152007699012756,
"learning_rate": 5.571254328525183e-06,
"loss": 0.5854,
"step": 7571
},
{
"epoch": 0.48,
"grad_norm": 0.9742832183837891,
"learning_rate": 5.570235041100972e-06,
"loss": 0.6213,
"step": 7572
},
{
"epoch": 0.48,
"grad_norm": 0.8699829578399658,
"learning_rate": 5.569215729666146e-06,
"loss": 0.5945,
"step": 7573
},
{
"epoch": 0.48,
"grad_norm": 0.8996490240097046,
"learning_rate": 5.568196394263626e-06,
"loss": 0.6015,
"step": 7574
},
{
"epoch": 0.48,
"grad_norm": 0.8309650421142578,
"learning_rate": 5.567177034936333e-06,
"loss": 0.5423,
"step": 7575
},
{
"epoch": 0.48,
"grad_norm": 0.884103000164032,
"learning_rate": 5.566157651727189e-06,
"loss": 0.6507,
"step": 7576
},
{
"epoch": 0.48,
"grad_norm": 0.8329902291297913,
"learning_rate": 5.5651382446791134e-06,
"loss": 0.5838,
"step": 7577
},
{
"epoch": 0.48,
"grad_norm": 0.8965960741043091,
"learning_rate": 5.564118813835033e-06,
"loss": 0.5781,
"step": 7578
},
{
"epoch": 0.48,
"grad_norm": 0.8552922010421753,
"learning_rate": 5.563099359237872e-06,
"loss": 0.5883,
"step": 7579
},
{
"epoch": 0.48,
"grad_norm": 0.8796671628952026,
"learning_rate": 5.5620798809305575e-06,
"loss": 0.6016,
"step": 7580
},
{
"epoch": 0.48,
"grad_norm": 0.9553985595703125,
"learning_rate": 5.561060378956014e-06,
"loss": 0.6185,
"step": 7581
},
{
"epoch": 0.48,
"grad_norm": 0.836025059223175,
"learning_rate": 5.560040853357168e-06,
"loss": 0.5931,
"step": 7582
},
{
"epoch": 0.48,
"grad_norm": 0.9648067355155945,
"learning_rate": 5.55902130417695e-06,
"loss": 0.6206,
"step": 7583
},
{
"epoch": 0.48,
"grad_norm": 0.8633977174758911,
"learning_rate": 5.558001731458293e-06,
"loss": 0.6114,
"step": 7584
},
{
"epoch": 0.48,
"grad_norm": 0.885905921459198,
"learning_rate": 5.556982135244121e-06,
"loss": 0.6113,
"step": 7585
},
{
"epoch": 0.48,
"grad_norm": 0.8822622299194336,
"learning_rate": 5.5559625155773685e-06,
"loss": 0.5788,
"step": 7586
},
{
"epoch": 0.48,
"grad_norm": 0.8463605046272278,
"learning_rate": 5.554942872500971e-06,
"loss": 0.5707,
"step": 7587
},
{
"epoch": 0.48,
"grad_norm": 0.8903326392173767,
"learning_rate": 5.5539232060578574e-06,
"loss": 0.6062,
"step": 7588
},
{
"epoch": 0.48,
"grad_norm": 0.8166199922561646,
"learning_rate": 5.552903516290966e-06,
"loss": 0.5806,
"step": 7589
},
{
"epoch": 0.48,
"grad_norm": 0.8574814200401306,
"learning_rate": 5.55188380324323e-06,
"loss": 0.586,
"step": 7590
},
{
"epoch": 0.48,
"grad_norm": 0.8767586946487427,
"learning_rate": 5.550864066957587e-06,
"loss": 0.6098,
"step": 7591
},
{
"epoch": 0.48,
"grad_norm": 0.8981362581253052,
"learning_rate": 5.549844307476975e-06,
"loss": 0.6253,
"step": 7592
},
{
"epoch": 0.48,
"grad_norm": 0.9011292457580566,
"learning_rate": 5.548824524844333e-06,
"loss": 0.6296,
"step": 7593
},
{
"epoch": 0.48,
"grad_norm": 0.8577702045440674,
"learning_rate": 5.547804719102596e-06,
"loss": 0.5661,
"step": 7594
},
{
"epoch": 0.48,
"grad_norm": 0.9309691190719604,
"learning_rate": 5.546784890294712e-06,
"loss": 0.646,
"step": 7595
},
{
"epoch": 0.48,
"grad_norm": 0.9005495309829712,
"learning_rate": 5.545765038463615e-06,
"loss": 0.634,
"step": 7596
},
{
"epoch": 0.48,
"grad_norm": 0.9504218697547913,
"learning_rate": 5.544745163652253e-06,
"loss": 0.5943,
"step": 7597
},
{
"epoch": 0.48,
"grad_norm": 0.880858302116394,
"learning_rate": 5.543725265903565e-06,
"loss": 0.5743,
"step": 7598
},
{
"epoch": 0.48,
"grad_norm": 0.9303281307220459,
"learning_rate": 5.5427053452605004e-06,
"loss": 0.6134,
"step": 7599
},
{
"epoch": 0.48,
"grad_norm": 0.8957832455635071,
"learning_rate": 5.541685401766001e-06,
"loss": 0.6142,
"step": 7600
},
{
"epoch": 0.48,
"grad_norm": 0.860815703868866,
"learning_rate": 5.540665435463013e-06,
"loss": 0.5398,
"step": 7601
},
{
"epoch": 0.48,
"grad_norm": 0.8271889090538025,
"learning_rate": 5.539645446394485e-06,
"loss": 0.6002,
"step": 7602
},
{
"epoch": 0.48,
"grad_norm": 0.8800649046897888,
"learning_rate": 5.538625434603363e-06,
"loss": 0.6247,
"step": 7603
},
{
"epoch": 0.48,
"grad_norm": 0.8922380208969116,
"learning_rate": 5.5376054001326e-06,
"loss": 0.6069,
"step": 7604
},
{
"epoch": 0.48,
"grad_norm": 0.8567295074462891,
"learning_rate": 5.53658534302514e-06,
"loss": 0.6585,
"step": 7605
},
{
"epoch": 0.48,
"grad_norm": 0.9114384651184082,
"learning_rate": 5.535565263323942e-06,
"loss": 0.5325,
"step": 7606
},
{
"epoch": 0.48,
"grad_norm": 0.8971738219261169,
"learning_rate": 5.534545161071951e-06,
"loss": 0.6266,
"step": 7607
},
{
"epoch": 0.48,
"grad_norm": 0.8661735653877258,
"learning_rate": 5.533525036312125e-06,
"loss": 0.5861,
"step": 7608
},
{
"epoch": 0.48,
"grad_norm": 0.9209964275360107,
"learning_rate": 5.532504889087413e-06,
"loss": 0.608,
"step": 7609
},
{
"epoch": 0.48,
"grad_norm": 0.9354657530784607,
"learning_rate": 5.531484719440776e-06,
"loss": 0.6116,
"step": 7610
},
{
"epoch": 0.48,
"grad_norm": 0.8302944898605347,
"learning_rate": 5.530464527415164e-06,
"loss": 0.5349,
"step": 7611
},
{
"epoch": 0.48,
"grad_norm": 0.9335947632789612,
"learning_rate": 5.529444313053538e-06,
"loss": 0.5976,
"step": 7612
},
{
"epoch": 0.48,
"grad_norm": 0.9276868104934692,
"learning_rate": 5.528424076398851e-06,
"loss": 0.6024,
"step": 7613
},
{
"epoch": 0.48,
"grad_norm": 0.8259304165840149,
"learning_rate": 5.527403817494067e-06,
"loss": 0.6018,
"step": 7614
},
{
"epoch": 0.48,
"grad_norm": 0.8607040643692017,
"learning_rate": 5.526383536382142e-06,
"loss": 0.5221,
"step": 7615
},
{
"epoch": 0.48,
"grad_norm": 0.8541266918182373,
"learning_rate": 5.525363233106037e-06,
"loss": 0.5861,
"step": 7616
},
{
"epoch": 0.48,
"grad_norm": 0.8392813801765442,
"learning_rate": 5.524342907708714e-06,
"loss": 0.6229,
"step": 7617
},
{
"epoch": 0.48,
"grad_norm": 0.8519023060798645,
"learning_rate": 5.5233225602331355e-06,
"loss": 0.5533,
"step": 7618
},
{
"epoch": 0.48,
"grad_norm": 0.9389131665229797,
"learning_rate": 5.522302190722264e-06,
"loss": 0.5698,
"step": 7619
},
{
"epoch": 0.48,
"grad_norm": 0.8677113056182861,
"learning_rate": 5.5212817992190644e-06,
"loss": 0.6011,
"step": 7620
},
{
"epoch": 0.48,
"grad_norm": 1.0327938795089722,
"learning_rate": 5.5202613857665025e-06,
"loss": 0.622,
"step": 7621
},
{
"epoch": 0.48,
"grad_norm": 0.8638737797737122,
"learning_rate": 5.5192409504075416e-06,
"loss": 0.5677,
"step": 7622
},
{
"epoch": 0.48,
"grad_norm": 0.9155073761940002,
"learning_rate": 5.518220493185153e-06,
"loss": 0.5872,
"step": 7623
},
{
"epoch": 0.48,
"grad_norm": 0.8531370162963867,
"learning_rate": 5.517200014142301e-06,
"loss": 0.5778,
"step": 7624
},
{
"epoch": 0.48,
"grad_norm": 0.865264356136322,
"learning_rate": 5.516179513321955e-06,
"loss": 0.5836,
"step": 7625
},
{
"epoch": 0.48,
"grad_norm": 0.8751364946365356,
"learning_rate": 5.5151589907670856e-06,
"loss": 0.572,
"step": 7626
},
{
"epoch": 0.48,
"grad_norm": 0.8969213962554932,
"learning_rate": 5.514138446520664e-06,
"loss": 0.6152,
"step": 7627
},
{
"epoch": 0.48,
"grad_norm": 0.8782392740249634,
"learning_rate": 5.51311788062566e-06,
"loss": 0.5778,
"step": 7628
},
{
"epoch": 0.48,
"grad_norm": 0.8104063868522644,
"learning_rate": 5.512097293125047e-06,
"loss": 0.5285,
"step": 7629
},
{
"epoch": 0.48,
"grad_norm": 0.8759908676147461,
"learning_rate": 5.511076684061799e-06,
"loss": 0.5613,
"step": 7630
},
{
"epoch": 0.48,
"grad_norm": 0.8901430368423462,
"learning_rate": 5.51005605347889e-06,
"loss": 0.6212,
"step": 7631
},
{
"epoch": 0.48,
"grad_norm": 0.8541998863220215,
"learning_rate": 5.509035401419296e-06,
"loss": 0.5491,
"step": 7632
},
{
"epoch": 0.48,
"grad_norm": 0.938401460647583,
"learning_rate": 5.50801472792599e-06,
"loss": 0.5846,
"step": 7633
},
{
"epoch": 0.48,
"grad_norm": 0.8890235424041748,
"learning_rate": 5.5069940330419525e-06,
"loss": 0.5504,
"step": 7634
},
{
"epoch": 0.48,
"grad_norm": 0.9128061532974243,
"learning_rate": 5.5059733168101596e-06,
"loss": 0.6344,
"step": 7635
},
{
"epoch": 0.48,
"grad_norm": 0.9124163389205933,
"learning_rate": 5.504952579273589e-06,
"loss": 0.5604,
"step": 7636
},
{
"epoch": 0.48,
"grad_norm": 0.9557987451553345,
"learning_rate": 5.503931820475223e-06,
"loss": 0.6476,
"step": 7637
},
{
"epoch": 0.48,
"grad_norm": 0.8558072447776794,
"learning_rate": 5.502911040458042e-06,
"loss": 0.6132,
"step": 7638
},
{
"epoch": 0.48,
"grad_norm": 0.8189815878868103,
"learning_rate": 5.501890239265025e-06,
"loss": 0.5687,
"step": 7639
},
{
"epoch": 0.48,
"grad_norm": 0.8753035664558411,
"learning_rate": 5.500869416939156e-06,
"loss": 0.6288,
"step": 7640
},
{
"epoch": 0.48,
"grad_norm": 0.8629709482192993,
"learning_rate": 5.49984857352342e-06,
"loss": 0.5766,
"step": 7641
},
{
"epoch": 0.48,
"grad_norm": 0.9087205529212952,
"learning_rate": 5.4988277090607986e-06,
"loss": 0.5434,
"step": 7642
},
{
"epoch": 0.48,
"grad_norm": 0.9986720085144043,
"learning_rate": 5.4978068235942775e-06,
"loss": 0.6495,
"step": 7643
},
{
"epoch": 0.48,
"grad_norm": 0.957332193851471,
"learning_rate": 5.496785917166843e-06,
"loss": 0.6054,
"step": 7644
},
{
"epoch": 0.48,
"grad_norm": 0.8836731910705566,
"learning_rate": 5.49576498982148e-06,
"loss": 0.5649,
"step": 7645
},
{
"epoch": 0.48,
"grad_norm": 0.8434025049209595,
"learning_rate": 5.49474404160118e-06,
"loss": 0.5556,
"step": 7646
},
{
"epoch": 0.48,
"grad_norm": 0.89605712890625,
"learning_rate": 5.4937230725489285e-06,
"loss": 0.6331,
"step": 7647
},
{
"epoch": 0.48,
"grad_norm": 0.8855474591255188,
"learning_rate": 5.492702082707716e-06,
"loss": 0.5908,
"step": 7648
},
{
"epoch": 0.48,
"grad_norm": 0.8697336316108704,
"learning_rate": 5.491681072120534e-06,
"loss": 0.6265,
"step": 7649
},
{
"epoch": 0.48,
"grad_norm": 0.9648655652999878,
"learning_rate": 5.4906600408303715e-06,
"loss": 0.6178,
"step": 7650
},
{
"epoch": 0.48,
"grad_norm": 0.901523768901825,
"learning_rate": 5.489638988880222e-06,
"loss": 0.562,
"step": 7651
},
{
"epoch": 0.48,
"grad_norm": 0.9213078618049622,
"learning_rate": 5.488617916313077e-06,
"loss": 0.5578,
"step": 7652
},
{
"epoch": 0.48,
"grad_norm": 0.8305292725563049,
"learning_rate": 5.487596823171932e-06,
"loss": 0.5244,
"step": 7653
},
{
"epoch": 0.48,
"grad_norm": 0.8835660219192505,
"learning_rate": 5.486575709499782e-06,
"loss": 0.6254,
"step": 7654
},
{
"epoch": 0.48,
"grad_norm": 0.9087215065956116,
"learning_rate": 5.48555457533962e-06,
"loss": 0.6071,
"step": 7655
},
{
"epoch": 0.49,
"grad_norm": 0.8418556451797485,
"learning_rate": 5.484533420734444e-06,
"loss": 0.5622,
"step": 7656
},
{
"epoch": 0.49,
"grad_norm": 0.8559536337852478,
"learning_rate": 5.483512245727252e-06,
"loss": 0.625,
"step": 7657
},
{
"epoch": 0.49,
"grad_norm": 0.8496268391609192,
"learning_rate": 5.482491050361041e-06,
"loss": 0.5712,
"step": 7658
},
{
"epoch": 0.49,
"grad_norm": 0.8907086253166199,
"learning_rate": 5.48146983467881e-06,
"loss": 0.618,
"step": 7659
},
{
"epoch": 0.49,
"grad_norm": 0.9585722088813782,
"learning_rate": 5.480448598723559e-06,
"loss": 0.6301,
"step": 7660
},
{
"epoch": 0.49,
"grad_norm": 0.9126084446907043,
"learning_rate": 5.47942734253829e-06,
"loss": 0.6009,
"step": 7661
},
{
"epoch": 0.49,
"grad_norm": 0.8798913955688477,
"learning_rate": 5.478406066166003e-06,
"loss": 0.5604,
"step": 7662
},
{
"epoch": 0.49,
"grad_norm": 0.8995177745819092,
"learning_rate": 5.477384769649701e-06,
"loss": 0.6143,
"step": 7663
},
{
"epoch": 0.49,
"grad_norm": 0.9048047661781311,
"learning_rate": 5.476363453032387e-06,
"loss": 0.5813,
"step": 7664
},
{
"epoch": 0.49,
"grad_norm": 0.808437168598175,
"learning_rate": 5.475342116357064e-06,
"loss": 0.6184,
"step": 7665
},
{
"epoch": 0.49,
"grad_norm": 0.9141887426376343,
"learning_rate": 5.474320759666739e-06,
"loss": 0.5828,
"step": 7666
},
{
"epoch": 0.49,
"grad_norm": 0.885321855545044,
"learning_rate": 5.473299383004417e-06,
"loss": 0.6431,
"step": 7667
},
{
"epoch": 0.49,
"grad_norm": 0.828567385673523,
"learning_rate": 5.472277986413104e-06,
"loss": 0.5438,
"step": 7668
},
{
"epoch": 0.49,
"grad_norm": 0.8383506536483765,
"learning_rate": 5.471256569935809e-06,
"loss": 0.5765,
"step": 7669
},
{
"epoch": 0.49,
"grad_norm": 0.8781369924545288,
"learning_rate": 5.470235133615538e-06,
"loss": 0.5844,
"step": 7670
},
{
"epoch": 0.49,
"grad_norm": 0.9070538282394409,
"learning_rate": 5.4692136774953004e-06,
"loss": 0.5773,
"step": 7671
},
{
"epoch": 0.49,
"grad_norm": 0.8789056539535522,
"learning_rate": 5.46819220161811e-06,
"loss": 0.5749,
"step": 7672
},
{
"epoch": 0.49,
"grad_norm": 0.8679722547531128,
"learning_rate": 5.467170706026973e-06,
"loss": 0.5518,
"step": 7673
},
{
"epoch": 0.49,
"grad_norm": 0.8629012107849121,
"learning_rate": 5.466149190764902e-06,
"loss": 0.5732,
"step": 7674
},
{
"epoch": 0.49,
"grad_norm": 0.8968449831008911,
"learning_rate": 5.465127655874911e-06,
"loss": 0.6644,
"step": 7675
},
{
"epoch": 0.49,
"grad_norm": 0.9048300385475159,
"learning_rate": 5.464106101400013e-06,
"loss": 0.5802,
"step": 7676
},
{
"epoch": 0.49,
"grad_norm": 0.8762057423591614,
"learning_rate": 5.463084527383222e-06,
"loss": 0.5703,
"step": 7677
},
{
"epoch": 0.49,
"grad_norm": 0.8548493385314941,
"learning_rate": 5.4620629338675505e-06,
"loss": 0.5622,
"step": 7678
},
{
"epoch": 0.49,
"grad_norm": 0.9109390377998352,
"learning_rate": 5.461041320896019e-06,
"loss": 0.6231,
"step": 7679
},
{
"epoch": 0.49,
"grad_norm": 0.8898347616195679,
"learning_rate": 5.460019688511639e-06,
"loss": 0.5709,
"step": 7680
},
{
"epoch": 0.49,
"grad_norm": 0.9044870138168335,
"learning_rate": 5.458998036757431e-06,
"loss": 0.6593,
"step": 7681
},
{
"epoch": 0.49,
"grad_norm": 0.8833417296409607,
"learning_rate": 5.4579763656764115e-06,
"loss": 0.6153,
"step": 7682
},
{
"epoch": 0.49,
"grad_norm": 0.9275777339935303,
"learning_rate": 5.456954675311602e-06,
"loss": 0.5869,
"step": 7683
},
{
"epoch": 0.49,
"grad_norm": 0.9542369246482849,
"learning_rate": 5.45593296570602e-06,
"loss": 0.6183,
"step": 7684
},
{
"epoch": 0.49,
"grad_norm": 0.9259552359580994,
"learning_rate": 5.454911236902687e-06,
"loss": 0.6372,
"step": 7685
},
{
"epoch": 0.49,
"grad_norm": 0.9347798824310303,
"learning_rate": 5.453889488944623e-06,
"loss": 0.5539,
"step": 7686
},
{
"epoch": 0.49,
"grad_norm": 0.9027972221374512,
"learning_rate": 5.452867721874854e-06,
"loss": 0.6572,
"step": 7687
},
{
"epoch": 0.49,
"grad_norm": 0.904701828956604,
"learning_rate": 5.4518459357364e-06,
"loss": 0.6378,
"step": 7688
},
{
"epoch": 0.49,
"grad_norm": 0.8021993637084961,
"learning_rate": 5.4508241305722856e-06,
"loss": 0.6006,
"step": 7689
},
{
"epoch": 0.49,
"grad_norm": 0.8704695701599121,
"learning_rate": 5.449802306425532e-06,
"loss": 0.5669,
"step": 7690
},
{
"epoch": 0.49,
"grad_norm": 0.8994425535202026,
"learning_rate": 5.448780463339172e-06,
"loss": 0.6242,
"step": 7691
},
{
"epoch": 0.49,
"grad_norm": 0.8955239653587341,
"learning_rate": 5.447758601356226e-06,
"loss": 0.6172,
"step": 7692
},
{
"epoch": 0.49,
"grad_norm": 0.8894906044006348,
"learning_rate": 5.446736720519725e-06,
"loss": 0.592,
"step": 7693
},
{
"epoch": 0.49,
"grad_norm": 0.9087505340576172,
"learning_rate": 5.445714820872693e-06,
"loss": 0.5644,
"step": 7694
},
{
"epoch": 0.49,
"grad_norm": 0.8230986595153809,
"learning_rate": 5.4446929024581606e-06,
"loss": 0.5527,
"step": 7695
},
{
"epoch": 0.49,
"grad_norm": 0.9331679344177246,
"learning_rate": 5.4436709653191575e-06,
"loss": 0.5922,
"step": 7696
},
{
"epoch": 0.49,
"grad_norm": 0.8441013097763062,
"learning_rate": 5.442649009498713e-06,
"loss": 0.5654,
"step": 7697
},
{
"epoch": 0.49,
"grad_norm": 0.9754238128662109,
"learning_rate": 5.441627035039859e-06,
"loss": 0.6082,
"step": 7698
},
{
"epoch": 0.49,
"grad_norm": 0.8756945133209229,
"learning_rate": 5.440605041985626e-06,
"loss": 0.5359,
"step": 7699
},
{
"epoch": 0.49,
"grad_norm": 0.7946870923042297,
"learning_rate": 5.439583030379049e-06,
"loss": 0.5282,
"step": 7700
},
{
"epoch": 0.49,
"grad_norm": 0.9179619550704956,
"learning_rate": 5.438561000263157e-06,
"loss": 0.5532,
"step": 7701
},
{
"epoch": 0.49,
"grad_norm": 0.8969404697418213,
"learning_rate": 5.4375389516809895e-06,
"loss": 0.6416,
"step": 7702
},
{
"epoch": 0.49,
"grad_norm": 0.9204484820365906,
"learning_rate": 5.436516884675579e-06,
"loss": 0.6281,
"step": 7703
},
{
"epoch": 0.49,
"grad_norm": 0.9213035702705383,
"learning_rate": 5.43549479928996e-06,
"loss": 0.5883,
"step": 7704
},
{
"epoch": 0.49,
"grad_norm": 0.8417788743972778,
"learning_rate": 5.434472695567169e-06,
"loss": 0.5565,
"step": 7705
},
{
"epoch": 0.49,
"grad_norm": 0.8737784028053284,
"learning_rate": 5.433450573550246e-06,
"loss": 0.5677,
"step": 7706
},
{
"epoch": 0.49,
"grad_norm": 0.8927813768386841,
"learning_rate": 5.432428433282226e-06,
"loss": 0.5841,
"step": 7707
},
{
"epoch": 0.49,
"grad_norm": 0.9724695086479187,
"learning_rate": 5.43140627480615e-06,
"loss": 0.6503,
"step": 7708
},
{
"epoch": 0.49,
"grad_norm": 0.854300856590271,
"learning_rate": 5.4303840981650565e-06,
"loss": 0.5347,
"step": 7709
},
{
"epoch": 0.49,
"grad_norm": 0.8913581371307373,
"learning_rate": 5.429361903401985e-06,
"loss": 0.6083,
"step": 7710
},
{
"epoch": 0.49,
"grad_norm": 0.8786452412605286,
"learning_rate": 5.4283396905599785e-06,
"loss": 0.5958,
"step": 7711
},
{
"epoch": 0.49,
"grad_norm": 0.9056409001350403,
"learning_rate": 5.427317459682076e-06,
"loss": 0.6015,
"step": 7712
},
{
"epoch": 0.49,
"grad_norm": 0.8648980855941772,
"learning_rate": 5.426295210811323e-06,
"loss": 0.5982,
"step": 7713
},
{
"epoch": 0.49,
"grad_norm": 0.8439405560493469,
"learning_rate": 5.425272943990761e-06,
"loss": 0.5646,
"step": 7714
},
{
"epoch": 0.49,
"grad_norm": 0.9676143527030945,
"learning_rate": 5.4242506592634354e-06,
"loss": 0.5852,
"step": 7715
},
{
"epoch": 0.49,
"grad_norm": 0.8673433661460876,
"learning_rate": 5.423228356672391e-06,
"loss": 0.5583,
"step": 7716
},
{
"epoch": 0.49,
"grad_norm": 0.878349244594574,
"learning_rate": 5.422206036260671e-06,
"loss": 0.5877,
"step": 7717
},
{
"epoch": 0.49,
"grad_norm": 0.863304853439331,
"learning_rate": 5.421183698071325e-06,
"loss": 0.633,
"step": 7718
},
{
"epoch": 0.49,
"grad_norm": 0.8977344036102295,
"learning_rate": 5.420161342147399e-06,
"loss": 0.5786,
"step": 7719
},
{
"epoch": 0.49,
"grad_norm": 0.9075315594673157,
"learning_rate": 5.4191389685319395e-06,
"loss": 0.6107,
"step": 7720
},
{
"epoch": 0.49,
"grad_norm": 0.8574069738388062,
"learning_rate": 5.4181165772679955e-06,
"loss": 0.5664,
"step": 7721
},
{
"epoch": 0.49,
"grad_norm": 0.8526822328567505,
"learning_rate": 5.417094168398618e-06,
"loss": 0.5912,
"step": 7722
},
{
"epoch": 0.49,
"grad_norm": 0.8845213055610657,
"learning_rate": 5.416071741966856e-06,
"loss": 0.6056,
"step": 7723
},
{
"epoch": 0.49,
"grad_norm": 0.8884814977645874,
"learning_rate": 5.41504929801576e-06,
"loss": 0.6047,
"step": 7724
},
{
"epoch": 0.49,
"grad_norm": 0.8512783050537109,
"learning_rate": 5.414026836588382e-06,
"loss": 0.5831,
"step": 7725
},
{
"epoch": 0.49,
"grad_norm": 0.8551806807518005,
"learning_rate": 5.413004357727775e-06,
"loss": 0.5865,
"step": 7726
},
{
"epoch": 0.49,
"grad_norm": 0.9021192789077759,
"learning_rate": 5.411981861476991e-06,
"loss": 0.5534,
"step": 7727
},
{
"epoch": 0.49,
"grad_norm": 0.8763885498046875,
"learning_rate": 5.4109593478790825e-06,
"loss": 0.544,
"step": 7728
},
{
"epoch": 0.49,
"grad_norm": 0.9107353687286377,
"learning_rate": 5.409936816977106e-06,
"loss": 0.654,
"step": 7729
},
{
"epoch": 0.49,
"grad_norm": 0.8590700626373291,
"learning_rate": 5.408914268814117e-06,
"loss": 0.594,
"step": 7730
},
{
"epoch": 0.49,
"grad_norm": 0.883228063583374,
"learning_rate": 5.4078917034331705e-06,
"loss": 0.5545,
"step": 7731
},
{
"epoch": 0.49,
"grad_norm": 0.8624158501625061,
"learning_rate": 5.4068691208773225e-06,
"loss": 0.5184,
"step": 7732
},
{
"epoch": 0.49,
"grad_norm": 0.8744218945503235,
"learning_rate": 5.405846521189632e-06,
"loss": 0.6158,
"step": 7733
},
{
"epoch": 0.49,
"grad_norm": 0.9129903316497803,
"learning_rate": 5.404823904413157e-06,
"loss": 0.5886,
"step": 7734
},
{
"epoch": 0.49,
"grad_norm": 0.8881204128265381,
"learning_rate": 5.403801270590955e-06,
"loss": 0.5861,
"step": 7735
},
{
"epoch": 0.49,
"grad_norm": 0.938651442527771,
"learning_rate": 5.402778619766086e-06,
"loss": 0.6119,
"step": 7736
},
{
"epoch": 0.49,
"grad_norm": 0.8968808650970459,
"learning_rate": 5.40175595198161e-06,
"loss": 0.6051,
"step": 7737
},
{
"epoch": 0.49,
"grad_norm": 0.8762619495391846,
"learning_rate": 5.400733267280589e-06,
"loss": 0.5904,
"step": 7738
},
{
"epoch": 0.49,
"grad_norm": 0.863160252571106,
"learning_rate": 5.399710565706084e-06,
"loss": 0.6209,
"step": 7739
},
{
"epoch": 0.49,
"grad_norm": 0.8458674550056458,
"learning_rate": 5.3986878473011585e-06,
"loss": 0.5892,
"step": 7740
},
{
"epoch": 0.49,
"grad_norm": 0.8740219473838806,
"learning_rate": 5.397665112108874e-06,
"loss": 0.5339,
"step": 7741
},
{
"epoch": 0.49,
"grad_norm": 0.8719754815101624,
"learning_rate": 5.3966423601722955e-06,
"loss": 0.5679,
"step": 7742
},
{
"epoch": 0.49,
"grad_norm": 1.0123850107192993,
"learning_rate": 5.3956195915344855e-06,
"loss": 0.6039,
"step": 7743
},
{
"epoch": 0.49,
"grad_norm": 0.9461089372634888,
"learning_rate": 5.394596806238511e-06,
"loss": 0.6337,
"step": 7744
},
{
"epoch": 0.49,
"grad_norm": 0.8724687695503235,
"learning_rate": 5.39357400432744e-06,
"loss": 0.524,
"step": 7745
},
{
"epoch": 0.49,
"grad_norm": 0.8834648728370667,
"learning_rate": 5.392551185844334e-06,
"loss": 0.6,
"step": 7746
},
{
"epoch": 0.49,
"grad_norm": 0.8506259322166443,
"learning_rate": 5.391528350832265e-06,
"loss": 0.5867,
"step": 7747
},
{
"epoch": 0.49,
"grad_norm": 0.9510423541069031,
"learning_rate": 5.3905054993342985e-06,
"loss": 0.6107,
"step": 7748
},
{
"epoch": 0.49,
"grad_norm": 0.8999704122543335,
"learning_rate": 5.389482631393504e-06,
"loss": 0.6091,
"step": 7749
},
{
"epoch": 0.49,
"grad_norm": 0.8855018019676208,
"learning_rate": 5.388459747052951e-06,
"loss": 0.5748,
"step": 7750
},
{
"epoch": 0.49,
"grad_norm": 0.8717223405838013,
"learning_rate": 5.387436846355709e-06,
"loss": 0.5826,
"step": 7751
},
{
"epoch": 0.49,
"grad_norm": 0.8774482607841492,
"learning_rate": 5.386413929344849e-06,
"loss": 0.5812,
"step": 7752
},
{
"epoch": 0.49,
"grad_norm": 0.96014004945755,
"learning_rate": 5.3853909960634446e-06,
"loss": 0.5428,
"step": 7753
},
{
"epoch": 0.49,
"grad_norm": 0.8995871543884277,
"learning_rate": 5.3843680465545635e-06,
"loss": 0.6025,
"step": 7754
},
{
"epoch": 0.49,
"grad_norm": 0.9755268096923828,
"learning_rate": 5.3833450808612816e-06,
"loss": 0.6194,
"step": 7755
},
{
"epoch": 0.49,
"grad_norm": 0.8455917835235596,
"learning_rate": 5.382322099026673e-06,
"loss": 0.5979,
"step": 7756
},
{
"epoch": 0.49,
"grad_norm": 0.85719895362854,
"learning_rate": 5.38129910109381e-06,
"loss": 0.5648,
"step": 7757
},
{
"epoch": 0.49,
"grad_norm": 0.8735889196395874,
"learning_rate": 5.380276087105769e-06,
"loss": 0.5679,
"step": 7758
},
{
"epoch": 0.49,
"grad_norm": 0.8770782351493835,
"learning_rate": 5.379253057105623e-06,
"loss": 0.5705,
"step": 7759
},
{
"epoch": 0.49,
"grad_norm": 0.8773306012153625,
"learning_rate": 5.378230011136453e-06,
"loss": 0.6444,
"step": 7760
},
{
"epoch": 0.49,
"grad_norm": 0.8813656568527222,
"learning_rate": 5.37720694924133e-06,
"loss": 0.5294,
"step": 7761
},
{
"epoch": 0.49,
"grad_norm": 0.8682379126548767,
"learning_rate": 5.376183871463336e-06,
"loss": 0.5526,
"step": 7762
},
{
"epoch": 0.49,
"grad_norm": 0.881206214427948,
"learning_rate": 5.375160777845548e-06,
"loss": 0.587,
"step": 7763
},
{
"epoch": 0.49,
"grad_norm": 0.8982335329055786,
"learning_rate": 5.3741376684310455e-06,
"loss": 0.5905,
"step": 7764
},
{
"epoch": 0.49,
"grad_norm": 0.9069334864616394,
"learning_rate": 5.3731145432629065e-06,
"loss": 0.6072,
"step": 7765
},
{
"epoch": 0.49,
"grad_norm": 0.900351881980896,
"learning_rate": 5.3720914023842105e-06,
"loss": 0.5809,
"step": 7766
},
{
"epoch": 0.49,
"grad_norm": 0.8970122337341309,
"learning_rate": 5.371068245838042e-06,
"loss": 0.6318,
"step": 7767
},
{
"epoch": 0.49,
"grad_norm": 0.854917049407959,
"learning_rate": 5.37004507366748e-06,
"loss": 0.595,
"step": 7768
},
{
"epoch": 0.49,
"grad_norm": 0.904750645160675,
"learning_rate": 5.369021885915607e-06,
"loss": 0.6008,
"step": 7769
},
{
"epoch": 0.49,
"grad_norm": 0.9690344333648682,
"learning_rate": 5.367998682625506e-06,
"loss": 0.6083,
"step": 7770
},
{
"epoch": 0.49,
"grad_norm": 0.829422116279602,
"learning_rate": 5.366975463840262e-06,
"loss": 0.5196,
"step": 7771
},
{
"epoch": 0.49,
"grad_norm": 0.8954147696495056,
"learning_rate": 5.365952229602956e-06,
"loss": 0.5388,
"step": 7772
},
{
"epoch": 0.49,
"grad_norm": 0.8637533187866211,
"learning_rate": 5.3649289799566766e-06,
"loss": 0.6015,
"step": 7773
},
{
"epoch": 0.49,
"grad_norm": 0.9479194283485413,
"learning_rate": 5.363905714944505e-06,
"loss": 0.5729,
"step": 7774
},
{
"epoch": 0.49,
"grad_norm": 0.9531112313270569,
"learning_rate": 5.362882434609531e-06,
"loss": 0.6434,
"step": 7775
},
{
"epoch": 0.49,
"grad_norm": 0.8943924307823181,
"learning_rate": 5.36185913899484e-06,
"loss": 0.576,
"step": 7776
},
{
"epoch": 0.49,
"grad_norm": 0.9110773801803589,
"learning_rate": 5.36083582814352e-06,
"loss": 0.6053,
"step": 7777
},
{
"epoch": 0.49,
"grad_norm": 0.8563951849937439,
"learning_rate": 5.359812502098657e-06,
"loss": 0.5905,
"step": 7778
},
{
"epoch": 0.49,
"grad_norm": 0.8632087707519531,
"learning_rate": 5.358789160903343e-06,
"loss": 0.5819,
"step": 7779
},
{
"epoch": 0.49,
"grad_norm": 0.8864629864692688,
"learning_rate": 5.357765804600664e-06,
"loss": 0.5689,
"step": 7780
},
{
"epoch": 0.49,
"grad_norm": 0.9528976678848267,
"learning_rate": 5.3567424332337125e-06,
"loss": 0.5989,
"step": 7781
},
{
"epoch": 0.49,
"grad_norm": 0.92073655128479,
"learning_rate": 5.355719046845577e-06,
"loss": 0.6029,
"step": 7782
},
{
"epoch": 0.49,
"grad_norm": 0.8833118677139282,
"learning_rate": 5.354695645479352e-06,
"loss": 0.5792,
"step": 7783
},
{
"epoch": 0.49,
"grad_norm": 0.8913655877113342,
"learning_rate": 5.353672229178125e-06,
"loss": 0.564,
"step": 7784
},
{
"epoch": 0.49,
"grad_norm": 0.8653914332389832,
"learning_rate": 5.352648797984993e-06,
"loss": 0.5835,
"step": 7785
},
{
"epoch": 0.49,
"grad_norm": 0.8871545791625977,
"learning_rate": 5.351625351943044e-06,
"loss": 0.6209,
"step": 7786
},
{
"epoch": 0.49,
"grad_norm": 0.888781726360321,
"learning_rate": 5.350601891095377e-06,
"loss": 0.5891,
"step": 7787
},
{
"epoch": 0.49,
"grad_norm": 0.7901937961578369,
"learning_rate": 5.349578415485085e-06,
"loss": 0.4945,
"step": 7788
},
{
"epoch": 0.49,
"grad_norm": 0.9174894094467163,
"learning_rate": 5.34855492515526e-06,
"loss": 0.5817,
"step": 7789
},
{
"epoch": 0.49,
"grad_norm": 0.9078687429428101,
"learning_rate": 5.347531420148999e-06,
"loss": 0.6333,
"step": 7790
},
{
"epoch": 0.49,
"grad_norm": 0.8857147097587585,
"learning_rate": 5.3465079005094e-06,
"loss": 0.5257,
"step": 7791
},
{
"epoch": 0.49,
"grad_norm": 0.8582876920700073,
"learning_rate": 5.34548436627956e-06,
"loss": 0.6155,
"step": 7792
},
{
"epoch": 0.49,
"grad_norm": 0.9454988837242126,
"learning_rate": 5.344460817502573e-06,
"loss": 0.6312,
"step": 7793
},
{
"epoch": 0.49,
"grad_norm": 0.8494389057159424,
"learning_rate": 5.34343725422154e-06,
"loss": 0.5916,
"step": 7794
},
{
"epoch": 0.49,
"grad_norm": 0.8725386261940002,
"learning_rate": 5.342413676479559e-06,
"loss": 0.5982,
"step": 7795
},
{
"epoch": 0.49,
"grad_norm": 0.890536904335022,
"learning_rate": 5.34139008431973e-06,
"loss": 0.5657,
"step": 7796
},
{
"epoch": 0.49,
"grad_norm": 0.8989502787590027,
"learning_rate": 5.34036647778515e-06,
"loss": 0.6311,
"step": 7797
},
{
"epoch": 0.49,
"grad_norm": 0.8554948568344116,
"learning_rate": 5.3393428569189235e-06,
"loss": 0.5428,
"step": 7798
},
{
"epoch": 0.49,
"grad_norm": 0.8485680818557739,
"learning_rate": 5.338319221764149e-06,
"loss": 0.5813,
"step": 7799
},
{
"epoch": 0.49,
"grad_norm": 0.8714006543159485,
"learning_rate": 5.33729557236393e-06,
"loss": 0.6127,
"step": 7800
},
{
"epoch": 0.49,
"grad_norm": 0.8680577874183655,
"learning_rate": 5.336271908761367e-06,
"loss": 0.6045,
"step": 7801
},
{
"epoch": 0.49,
"grad_norm": 0.8042650818824768,
"learning_rate": 5.335248230999565e-06,
"loss": 0.5189,
"step": 7802
},
{
"epoch": 0.49,
"grad_norm": 0.9379438757896423,
"learning_rate": 5.334224539121625e-06,
"loss": 0.5976,
"step": 7803
},
{
"epoch": 0.49,
"grad_norm": 0.8921198844909668,
"learning_rate": 5.333200833170652e-06,
"loss": 0.5507,
"step": 7804
},
{
"epoch": 0.49,
"grad_norm": 0.8879731893539429,
"learning_rate": 5.332177113189751e-06,
"loss": 0.6268,
"step": 7805
},
{
"epoch": 0.49,
"grad_norm": 0.8605756759643555,
"learning_rate": 5.331153379222028e-06,
"loss": 0.6194,
"step": 7806
},
{
"epoch": 0.49,
"grad_norm": 0.9379689693450928,
"learning_rate": 5.330129631310589e-06,
"loss": 0.6294,
"step": 7807
},
{
"epoch": 0.49,
"grad_norm": 0.8884453177452087,
"learning_rate": 5.3291058694985385e-06,
"loss": 0.6167,
"step": 7808
},
{
"epoch": 0.49,
"grad_norm": 0.8566985726356506,
"learning_rate": 5.328082093828984e-06,
"loss": 0.6185,
"step": 7809
},
{
"epoch": 0.49,
"grad_norm": 0.8915068507194519,
"learning_rate": 5.327058304345035e-06,
"loss": 0.5716,
"step": 7810
},
{
"epoch": 0.49,
"grad_norm": 0.8683719635009766,
"learning_rate": 5.3260345010898e-06,
"loss": 0.5944,
"step": 7811
},
{
"epoch": 0.49,
"grad_norm": 0.9614280462265015,
"learning_rate": 5.325010684106384e-06,
"loss": 0.5791,
"step": 7812
},
{
"epoch": 0.49,
"grad_norm": 0.8619272708892822,
"learning_rate": 5.323986853437899e-06,
"loss": 0.5457,
"step": 7813
},
{
"epoch": 0.5,
"grad_norm": 0.9838071465492249,
"learning_rate": 5.322963009127454e-06,
"loss": 0.618,
"step": 7814
},
{
"epoch": 0.5,
"grad_norm": 0.8671537637710571,
"learning_rate": 5.321939151218163e-06,
"loss": 0.5641,
"step": 7815
},
{
"epoch": 0.5,
"grad_norm": 0.8439149856567383,
"learning_rate": 5.320915279753132e-06,
"loss": 0.592,
"step": 7816
},
{
"epoch": 0.5,
"grad_norm": 0.8067424297332764,
"learning_rate": 5.319891394775475e-06,
"loss": 0.5262,
"step": 7817
},
{
"epoch": 0.5,
"grad_norm": 0.979844331741333,
"learning_rate": 5.3188674963283064e-06,
"loss": 0.5845,
"step": 7818
},
{
"epoch": 0.5,
"grad_norm": 0.8906669020652771,
"learning_rate": 5.317843584454734e-06,
"loss": 0.5639,
"step": 7819
},
{
"epoch": 0.5,
"grad_norm": 0.9191656112670898,
"learning_rate": 5.316819659197875e-06,
"loss": 0.5816,
"step": 7820
},
{
"epoch": 0.5,
"grad_norm": 0.917048990726471,
"learning_rate": 5.315795720600842e-06,
"loss": 0.5991,
"step": 7821
},
{
"epoch": 0.5,
"grad_norm": 0.8632756471633911,
"learning_rate": 5.314771768706751e-06,
"loss": 0.5967,
"step": 7822
},
{
"epoch": 0.5,
"grad_norm": 0.9136775732040405,
"learning_rate": 5.313747803558714e-06,
"loss": 0.5945,
"step": 7823
},
{
"epoch": 0.5,
"grad_norm": 0.84110426902771,
"learning_rate": 5.312723825199849e-06,
"loss": 0.6218,
"step": 7824
},
{
"epoch": 0.5,
"grad_norm": 0.9239146113395691,
"learning_rate": 5.311699833673273e-06,
"loss": 0.6421,
"step": 7825
},
{
"epoch": 0.5,
"grad_norm": 0.9365952014923096,
"learning_rate": 5.310675829022101e-06,
"loss": 0.5668,
"step": 7826
},
{
"epoch": 0.5,
"grad_norm": 0.9483537673950195,
"learning_rate": 5.309651811289449e-06,
"loss": 0.5903,
"step": 7827
},
{
"epoch": 0.5,
"grad_norm": 0.9416823387145996,
"learning_rate": 5.308627780518437e-06,
"loss": 0.5688,
"step": 7828
},
{
"epoch": 0.5,
"grad_norm": 0.9334666728973389,
"learning_rate": 5.307603736752183e-06,
"loss": 0.5561,
"step": 7829
},
{
"epoch": 0.5,
"grad_norm": 0.8541433215141296,
"learning_rate": 5.306579680033807e-06,
"loss": 0.592,
"step": 7830
},
{
"epoch": 0.5,
"grad_norm": 0.9449893832206726,
"learning_rate": 5.305555610406425e-06,
"loss": 0.6002,
"step": 7831
},
{
"epoch": 0.5,
"grad_norm": 0.8886929154396057,
"learning_rate": 5.30453152791316e-06,
"loss": 0.6695,
"step": 7832
},
{
"epoch": 0.5,
"grad_norm": 0.8141634464263916,
"learning_rate": 5.303507432597134e-06,
"loss": 0.5946,
"step": 7833
},
{
"epoch": 0.5,
"grad_norm": 0.8616921901702881,
"learning_rate": 5.302483324501463e-06,
"loss": 0.6024,
"step": 7834
},
{
"epoch": 0.5,
"grad_norm": 0.8720713257789612,
"learning_rate": 5.3014592036692715e-06,
"loss": 0.5934,
"step": 7835
},
{
"epoch": 0.5,
"grad_norm": 0.954289436340332,
"learning_rate": 5.300435070143683e-06,
"loss": 0.5998,
"step": 7836
},
{
"epoch": 0.5,
"grad_norm": 0.8757979273796082,
"learning_rate": 5.2994109239678185e-06,
"loss": 0.6295,
"step": 7837
},
{
"epoch": 0.5,
"grad_norm": 0.9314550161361694,
"learning_rate": 5.298386765184801e-06,
"loss": 0.6031,
"step": 7838
},
{
"epoch": 0.5,
"grad_norm": 0.8398404121398926,
"learning_rate": 5.297362593837755e-06,
"loss": 0.5565,
"step": 7839
},
{
"epoch": 0.5,
"grad_norm": 0.8812541365623474,
"learning_rate": 5.296338409969805e-06,
"loss": 0.5657,
"step": 7840
},
{
"epoch": 0.5,
"grad_norm": 0.8774970173835754,
"learning_rate": 5.295314213624076e-06,
"loss": 0.5786,
"step": 7841
},
{
"epoch": 0.5,
"grad_norm": 0.8290955424308777,
"learning_rate": 5.2942900048436914e-06,
"loss": 0.5563,
"step": 7842
},
{
"epoch": 0.5,
"grad_norm": 0.9258725047111511,
"learning_rate": 5.293265783671778e-06,
"loss": 0.5809,
"step": 7843
},
{
"epoch": 0.5,
"grad_norm": 0.8245546817779541,
"learning_rate": 5.292241550151465e-06,
"loss": 0.6013,
"step": 7844
},
{
"epoch": 0.5,
"grad_norm": 0.9611520767211914,
"learning_rate": 5.291217304325875e-06,
"loss": 0.6349,
"step": 7845
},
{
"epoch": 0.5,
"grad_norm": 0.89043790102005,
"learning_rate": 5.290193046238139e-06,
"loss": 0.6121,
"step": 7846
},
{
"epoch": 0.5,
"grad_norm": 0.9023299217224121,
"learning_rate": 5.289168775931381e-06,
"loss": 0.5966,
"step": 7847
},
{
"epoch": 0.5,
"grad_norm": 0.881334125995636,
"learning_rate": 5.288144493448733e-06,
"loss": 0.5643,
"step": 7848
},
{
"epoch": 0.5,
"grad_norm": 0.9082907438278198,
"learning_rate": 5.287120198833324e-06,
"loss": 0.5767,
"step": 7849
},
{
"epoch": 0.5,
"grad_norm": 0.9419313073158264,
"learning_rate": 5.286095892128282e-06,
"loss": 0.6172,
"step": 7850
},
{
"epoch": 0.5,
"grad_norm": 0.9367068409919739,
"learning_rate": 5.285071573376735e-06,
"loss": 0.6101,
"step": 7851
},
{
"epoch": 0.5,
"grad_norm": 0.9079290628433228,
"learning_rate": 5.2840472426218185e-06,
"loss": 0.589,
"step": 7852
},
{
"epoch": 0.5,
"grad_norm": 0.948851466178894,
"learning_rate": 5.283022899906659e-06,
"loss": 0.6335,
"step": 7853
},
{
"epoch": 0.5,
"grad_norm": 0.921149492263794,
"learning_rate": 5.28199854527439e-06,
"loss": 0.5849,
"step": 7854
},
{
"epoch": 0.5,
"grad_norm": 0.8445439338684082,
"learning_rate": 5.280974178768144e-06,
"loss": 0.54,
"step": 7855
},
{
"epoch": 0.5,
"grad_norm": 0.9971843361854553,
"learning_rate": 5.279949800431052e-06,
"loss": 0.6041,
"step": 7856
},
{
"epoch": 0.5,
"grad_norm": 0.8367643356323242,
"learning_rate": 5.278925410306248e-06,
"loss": 0.5955,
"step": 7857
},
{
"epoch": 0.5,
"grad_norm": 0.8564116358757019,
"learning_rate": 5.277901008436865e-06,
"loss": 0.5757,
"step": 7858
},
{
"epoch": 0.5,
"grad_norm": 0.8856030702590942,
"learning_rate": 5.276876594866037e-06,
"loss": 0.5963,
"step": 7859
},
{
"epoch": 0.5,
"grad_norm": 0.8912267088890076,
"learning_rate": 5.2758521696369e-06,
"loss": 0.5597,
"step": 7860
},
{
"epoch": 0.5,
"grad_norm": 0.8481583595275879,
"learning_rate": 5.274827732792587e-06,
"loss": 0.5526,
"step": 7861
},
{
"epoch": 0.5,
"grad_norm": 0.9108606576919556,
"learning_rate": 5.273803284376234e-06,
"loss": 0.6079,
"step": 7862
},
{
"epoch": 0.5,
"grad_norm": 0.9559755921363831,
"learning_rate": 5.272778824430977e-06,
"loss": 0.6008,
"step": 7863
},
{
"epoch": 0.5,
"grad_norm": 0.8783113360404968,
"learning_rate": 5.271754352999953e-06,
"loss": 0.6102,
"step": 7864
},
{
"epoch": 0.5,
"grad_norm": 0.8379794359207153,
"learning_rate": 5.2707298701263e-06,
"loss": 0.5744,
"step": 7865
},
{
"epoch": 0.5,
"grad_norm": 0.8685166835784912,
"learning_rate": 5.269705375853151e-06,
"loss": 0.604,
"step": 7866
},
{
"epoch": 0.5,
"grad_norm": 0.875748336315155,
"learning_rate": 5.26868087022365e-06,
"loss": 0.6116,
"step": 7867
},
{
"epoch": 0.5,
"grad_norm": 0.890408992767334,
"learning_rate": 5.26765635328093e-06,
"loss": 0.567,
"step": 7868
},
{
"epoch": 0.5,
"grad_norm": 0.9582130312919617,
"learning_rate": 5.266631825068134e-06,
"loss": 0.6553,
"step": 7869
},
{
"epoch": 0.5,
"grad_norm": 0.888396143913269,
"learning_rate": 5.265607285628397e-06,
"loss": 0.611,
"step": 7870
},
{
"epoch": 0.5,
"grad_norm": 0.869216799736023,
"learning_rate": 5.264582735004863e-06,
"loss": 0.5906,
"step": 7871
},
{
"epoch": 0.5,
"grad_norm": 0.8864418864250183,
"learning_rate": 5.26355817324067e-06,
"loss": 0.6488,
"step": 7872
},
{
"epoch": 0.5,
"grad_norm": 0.8367258906364441,
"learning_rate": 5.26253360037896e-06,
"loss": 0.6189,
"step": 7873
},
{
"epoch": 0.5,
"grad_norm": 0.8717927932739258,
"learning_rate": 5.2615090164628705e-06,
"loss": 0.6071,
"step": 7874
},
{
"epoch": 0.5,
"grad_norm": 0.8906144499778748,
"learning_rate": 5.2604844215355484e-06,
"loss": 0.5616,
"step": 7875
},
{
"epoch": 0.5,
"grad_norm": 0.9154402017593384,
"learning_rate": 5.259459815640133e-06,
"loss": 0.6081,
"step": 7876
},
{
"epoch": 0.5,
"grad_norm": 0.9059274792671204,
"learning_rate": 5.258435198819768e-06,
"loss": 0.6212,
"step": 7877
},
{
"epoch": 0.5,
"grad_norm": 0.9382339715957642,
"learning_rate": 5.257410571117594e-06,
"loss": 0.6418,
"step": 7878
},
{
"epoch": 0.5,
"grad_norm": 0.8434200882911682,
"learning_rate": 5.256385932576759e-06,
"loss": 0.5638,
"step": 7879
},
{
"epoch": 0.5,
"grad_norm": 0.8744908571243286,
"learning_rate": 5.255361283240402e-06,
"loss": 0.5436,
"step": 7880
},
{
"epoch": 0.5,
"grad_norm": 0.8957458138465881,
"learning_rate": 5.254336623151672e-06,
"loss": 0.6203,
"step": 7881
},
{
"epoch": 0.5,
"grad_norm": 0.8486526608467102,
"learning_rate": 5.253311952353708e-06,
"loss": 0.5835,
"step": 7882
},
{
"epoch": 0.5,
"grad_norm": 0.9576562643051147,
"learning_rate": 5.252287270889661e-06,
"loss": 0.6513,
"step": 7883
},
{
"epoch": 0.5,
"grad_norm": 0.85997474193573,
"learning_rate": 5.251262578802675e-06,
"loss": 0.5634,
"step": 7884
},
{
"epoch": 0.5,
"grad_norm": 0.87550950050354,
"learning_rate": 5.250237876135895e-06,
"loss": 0.6243,
"step": 7885
},
{
"epoch": 0.5,
"grad_norm": 0.8734540343284607,
"learning_rate": 5.2492131629324695e-06,
"loss": 0.5659,
"step": 7886
},
{
"epoch": 0.5,
"grad_norm": 0.8869773745536804,
"learning_rate": 5.248188439235544e-06,
"loss": 0.578,
"step": 7887
},
{
"epoch": 0.5,
"grad_norm": 0.8749696016311646,
"learning_rate": 5.247163705088267e-06,
"loss": 0.564,
"step": 7888
},
{
"epoch": 0.5,
"grad_norm": 0.8944323658943176,
"learning_rate": 5.246138960533786e-06,
"loss": 0.6297,
"step": 7889
},
{
"epoch": 0.5,
"grad_norm": 0.9447425603866577,
"learning_rate": 5.245114205615249e-06,
"loss": 0.5535,
"step": 7890
},
{
"epoch": 0.5,
"grad_norm": 0.8836696743965149,
"learning_rate": 5.244089440375807e-06,
"loss": 0.5838,
"step": 7891
},
{
"epoch": 0.5,
"grad_norm": 0.8536423444747925,
"learning_rate": 5.243064664858607e-06,
"loss": 0.5484,
"step": 7892
},
{
"epoch": 0.5,
"grad_norm": 0.9463775157928467,
"learning_rate": 5.242039879106799e-06,
"loss": 0.631,
"step": 7893
},
{
"epoch": 0.5,
"grad_norm": 0.9138554334640503,
"learning_rate": 5.241015083163534e-06,
"loss": 0.6952,
"step": 7894
},
{
"epoch": 0.5,
"grad_norm": 0.8552803993225098,
"learning_rate": 5.239990277071962e-06,
"loss": 0.625,
"step": 7895
},
{
"epoch": 0.5,
"grad_norm": 0.894889235496521,
"learning_rate": 5.238965460875236e-06,
"loss": 0.5667,
"step": 7896
},
{
"epoch": 0.5,
"grad_norm": 0.8741210699081421,
"learning_rate": 5.237940634616504e-06,
"loss": 0.5868,
"step": 7897
},
{
"epoch": 0.5,
"grad_norm": 0.8499166965484619,
"learning_rate": 5.2369157983389205e-06,
"loss": 0.6187,
"step": 7898
},
{
"epoch": 0.5,
"grad_norm": 0.9158671498298645,
"learning_rate": 5.235890952085637e-06,
"loss": 0.5634,
"step": 7899
},
{
"epoch": 0.5,
"grad_norm": 0.8855353593826294,
"learning_rate": 5.234866095899806e-06,
"loss": 0.5651,
"step": 7900
},
{
"epoch": 0.5,
"grad_norm": 0.9134857654571533,
"learning_rate": 5.23384122982458e-06,
"loss": 0.588,
"step": 7901
},
{
"epoch": 0.5,
"grad_norm": 0.9252248406410217,
"learning_rate": 5.232816353903113e-06,
"loss": 0.6017,
"step": 7902
},
{
"epoch": 0.5,
"grad_norm": 0.8008279800415039,
"learning_rate": 5.231791468178561e-06,
"loss": 0.5136,
"step": 7903
},
{
"epoch": 0.5,
"grad_norm": 0.8626922965049744,
"learning_rate": 5.230766572694075e-06,
"loss": 0.5724,
"step": 7904
},
{
"epoch": 0.5,
"grad_norm": 0.9324626326560974,
"learning_rate": 5.229741667492811e-06,
"loss": 0.6267,
"step": 7905
},
{
"epoch": 0.5,
"grad_norm": 0.8620643615722656,
"learning_rate": 5.228716752617926e-06,
"loss": 0.5924,
"step": 7906
},
{
"epoch": 0.5,
"grad_norm": 0.8927160501480103,
"learning_rate": 5.2276918281125744e-06,
"loss": 0.6103,
"step": 7907
},
{
"epoch": 0.5,
"grad_norm": 0.8659266233444214,
"learning_rate": 5.22666689401991e-06,
"loss": 0.5934,
"step": 7908
},
{
"epoch": 0.5,
"grad_norm": 0.8656795620918274,
"learning_rate": 5.225641950383094e-06,
"loss": 0.6328,
"step": 7909
},
{
"epoch": 0.5,
"grad_norm": 0.881079375743866,
"learning_rate": 5.2246169972452775e-06,
"loss": 0.6129,
"step": 7910
},
{
"epoch": 0.5,
"grad_norm": 0.8573868870735168,
"learning_rate": 5.223592034649624e-06,
"loss": 0.5608,
"step": 7911
},
{
"epoch": 0.5,
"grad_norm": 0.8543702960014343,
"learning_rate": 5.2225670626392845e-06,
"loss": 0.5469,
"step": 7912
},
{
"epoch": 0.5,
"grad_norm": 0.8963991403579712,
"learning_rate": 5.221542081257421e-06,
"loss": 0.6221,
"step": 7913
},
{
"epoch": 0.5,
"grad_norm": 0.9009084105491638,
"learning_rate": 5.220517090547194e-06,
"loss": 0.5719,
"step": 7914
},
{
"epoch": 0.5,
"grad_norm": 0.9578242301940918,
"learning_rate": 5.219492090551757e-06,
"loss": 0.6152,
"step": 7915
},
{
"epoch": 0.5,
"grad_norm": 0.9097537398338318,
"learning_rate": 5.21846708131427e-06,
"loss": 0.6263,
"step": 7916
},
{
"epoch": 0.5,
"grad_norm": 0.932669997215271,
"learning_rate": 5.217442062877897e-06,
"loss": 0.5925,
"step": 7917
},
{
"epoch": 0.5,
"grad_norm": 0.8461833000183105,
"learning_rate": 5.216417035285795e-06,
"loss": 0.6158,
"step": 7918
},
{
"epoch": 0.5,
"grad_norm": 0.9005031585693359,
"learning_rate": 5.215391998581123e-06,
"loss": 0.6002,
"step": 7919
},
{
"epoch": 0.5,
"grad_norm": 0.8439646363258362,
"learning_rate": 5.214366952807043e-06,
"loss": 0.5732,
"step": 7920
},
{
"epoch": 0.5,
"grad_norm": 0.839756190776825,
"learning_rate": 5.213341898006718e-06,
"loss": 0.5291,
"step": 7921
},
{
"epoch": 0.5,
"grad_norm": 0.8541595935821533,
"learning_rate": 5.212316834223307e-06,
"loss": 0.6199,
"step": 7922
},
{
"epoch": 0.5,
"grad_norm": 0.8544859886169434,
"learning_rate": 5.211291761499973e-06,
"loss": 0.558,
"step": 7923
},
{
"epoch": 0.5,
"grad_norm": 0.8676169514656067,
"learning_rate": 5.210266679879877e-06,
"loss": 0.5531,
"step": 7924
},
{
"epoch": 0.5,
"grad_norm": 0.9017534255981445,
"learning_rate": 5.209241589406183e-06,
"loss": 0.5912,
"step": 7925
},
{
"epoch": 0.5,
"grad_norm": 0.8735457062721252,
"learning_rate": 5.208216490122055e-06,
"loss": 0.5727,
"step": 7926
},
{
"epoch": 0.5,
"grad_norm": 0.9029328227043152,
"learning_rate": 5.207191382070653e-06,
"loss": 0.5819,
"step": 7927
},
{
"epoch": 0.5,
"grad_norm": 0.9156153202056885,
"learning_rate": 5.206166265295143e-06,
"loss": 0.5943,
"step": 7928
},
{
"epoch": 0.5,
"grad_norm": 0.8806928396224976,
"learning_rate": 5.205141139838691e-06,
"loss": 0.5618,
"step": 7929
},
{
"epoch": 0.5,
"grad_norm": 0.903069257736206,
"learning_rate": 5.204116005744456e-06,
"loss": 0.5822,
"step": 7930
},
{
"epoch": 0.5,
"grad_norm": 0.9287469983100891,
"learning_rate": 5.2030908630556075e-06,
"loss": 0.6082,
"step": 7931
},
{
"epoch": 0.5,
"grad_norm": 0.8750594258308411,
"learning_rate": 5.202065711815309e-06,
"loss": 0.5648,
"step": 7932
},
{
"epoch": 0.5,
"grad_norm": 0.8411305546760559,
"learning_rate": 5.201040552066727e-06,
"loss": 0.5076,
"step": 7933
},
{
"epoch": 0.5,
"grad_norm": 0.9401187896728516,
"learning_rate": 5.200015383853026e-06,
"loss": 0.5915,
"step": 7934
},
{
"epoch": 0.5,
"grad_norm": 0.8993878364562988,
"learning_rate": 5.1989902072173735e-06,
"loss": 0.6175,
"step": 7935
},
{
"epoch": 0.5,
"grad_norm": 0.9325996041297913,
"learning_rate": 5.197965022202935e-06,
"loss": 0.5977,
"step": 7936
},
{
"epoch": 0.5,
"grad_norm": 0.8501147627830505,
"learning_rate": 5.196939828852879e-06,
"loss": 0.5955,
"step": 7937
},
{
"epoch": 0.5,
"grad_norm": 0.8839433789253235,
"learning_rate": 5.195914627210372e-06,
"loss": 0.5685,
"step": 7938
},
{
"epoch": 0.5,
"grad_norm": 0.8878698945045471,
"learning_rate": 5.19488941731858e-06,
"loss": 0.6122,
"step": 7939
},
{
"epoch": 0.5,
"grad_norm": 0.8705379366874695,
"learning_rate": 5.193864199220674e-06,
"loss": 0.5531,
"step": 7940
},
{
"epoch": 0.5,
"grad_norm": 0.8977400064468384,
"learning_rate": 5.192838972959821e-06,
"loss": 0.6069,
"step": 7941
},
{
"epoch": 0.5,
"grad_norm": 0.8894720673561096,
"learning_rate": 5.19181373857919e-06,
"loss": 0.5976,
"step": 7942
},
{
"epoch": 0.5,
"grad_norm": 0.8529515862464905,
"learning_rate": 5.190788496121948e-06,
"loss": 0.5842,
"step": 7943
},
{
"epoch": 0.5,
"grad_norm": 0.8185912370681763,
"learning_rate": 5.189763245631268e-06,
"loss": 0.5169,
"step": 7944
},
{
"epoch": 0.5,
"grad_norm": 0.8798929452896118,
"learning_rate": 5.188737987150316e-06,
"loss": 0.6247,
"step": 7945
},
{
"epoch": 0.5,
"grad_norm": 0.8841909766197205,
"learning_rate": 5.1877127207222666e-06,
"loss": 0.5642,
"step": 7946
},
{
"epoch": 0.5,
"grad_norm": 0.8578714728355408,
"learning_rate": 5.186687446390284e-06,
"loss": 0.5656,
"step": 7947
},
{
"epoch": 0.5,
"grad_norm": 0.8991813659667969,
"learning_rate": 5.185662164197546e-06,
"loss": 0.5925,
"step": 7948
},
{
"epoch": 0.5,
"grad_norm": 0.8656896352767944,
"learning_rate": 5.184636874187218e-06,
"loss": 0.5762,
"step": 7949
},
{
"epoch": 0.5,
"grad_norm": 0.9480549097061157,
"learning_rate": 5.183611576402474e-06,
"loss": 0.5916,
"step": 7950
},
{
"epoch": 0.5,
"grad_norm": 0.8683533072471619,
"learning_rate": 5.182586270886485e-06,
"loss": 0.6007,
"step": 7951
},
{
"epoch": 0.5,
"grad_norm": 0.8761510848999023,
"learning_rate": 5.181560957682423e-06,
"loss": 0.5939,
"step": 7952
},
{
"epoch": 0.5,
"grad_norm": 0.8311535716056824,
"learning_rate": 5.180535636833462e-06,
"loss": 0.5621,
"step": 7953
},
{
"epoch": 0.5,
"grad_norm": 0.859836995601654,
"learning_rate": 5.179510308382773e-06,
"loss": 0.5844,
"step": 7954
},
{
"epoch": 0.5,
"grad_norm": 0.897769033908844,
"learning_rate": 5.178484972373528e-06,
"loss": 0.6163,
"step": 7955
},
{
"epoch": 0.5,
"grad_norm": 0.8741475343704224,
"learning_rate": 5.177459628848903e-06,
"loss": 0.6248,
"step": 7956
},
{
"epoch": 0.5,
"grad_norm": 0.8983214497566223,
"learning_rate": 5.17643427785207e-06,
"loss": 0.6236,
"step": 7957
},
{
"epoch": 0.5,
"grad_norm": 0.8339930772781372,
"learning_rate": 5.175408919426204e-06,
"loss": 0.5713,
"step": 7958
},
{
"epoch": 0.5,
"grad_norm": 0.890082061290741,
"learning_rate": 5.174383553614478e-06,
"loss": 0.5438,
"step": 7959
},
{
"epoch": 0.5,
"grad_norm": 0.8514465689659119,
"learning_rate": 5.1733581804600674e-06,
"loss": 0.5949,
"step": 7960
},
{
"epoch": 0.5,
"grad_norm": 0.9061854481697083,
"learning_rate": 5.172332800006147e-06,
"loss": 0.6432,
"step": 7961
},
{
"epoch": 0.5,
"grad_norm": 0.8398959636688232,
"learning_rate": 5.171307412295892e-06,
"loss": 0.5946,
"step": 7962
},
{
"epoch": 0.5,
"grad_norm": 0.8187358379364014,
"learning_rate": 5.1702820173724766e-06,
"loss": 0.5614,
"step": 7963
},
{
"epoch": 0.5,
"grad_norm": 0.9887537360191345,
"learning_rate": 5.169256615279078e-06,
"loss": 0.6059,
"step": 7964
},
{
"epoch": 0.5,
"grad_norm": 0.8645609617233276,
"learning_rate": 5.168231206058874e-06,
"loss": 0.5553,
"step": 7965
},
{
"epoch": 0.5,
"grad_norm": 0.8241131901741028,
"learning_rate": 5.167205789755037e-06,
"loss": 0.5472,
"step": 7966
},
{
"epoch": 0.5,
"grad_norm": 0.8981542587280273,
"learning_rate": 5.1661803664107465e-06,
"loss": 0.5675,
"step": 7967
},
{
"epoch": 0.5,
"grad_norm": 0.8625651001930237,
"learning_rate": 5.16515493606918e-06,
"loss": 0.5916,
"step": 7968
},
{
"epoch": 0.5,
"grad_norm": 0.8327503800392151,
"learning_rate": 5.164129498773513e-06,
"loss": 0.5837,
"step": 7969
},
{
"epoch": 0.5,
"grad_norm": 0.8808488845825195,
"learning_rate": 5.163104054566922e-06,
"loss": 0.6029,
"step": 7970
},
{
"epoch": 0.51,
"grad_norm": 0.8956292867660522,
"learning_rate": 5.16207860349259e-06,
"loss": 0.5893,
"step": 7971
},
{
"epoch": 0.51,
"grad_norm": 0.8336197137832642,
"learning_rate": 5.16105314559369e-06,
"loss": 0.5671,
"step": 7972
},
{
"epoch": 0.51,
"grad_norm": 0.8766692280769348,
"learning_rate": 5.160027680913402e-06,
"loss": 0.5806,
"step": 7973
},
{
"epoch": 0.51,
"grad_norm": 0.8673431873321533,
"learning_rate": 5.159002209494905e-06,
"loss": 0.6534,
"step": 7974
},
{
"epoch": 0.51,
"grad_norm": 0.8875123858451843,
"learning_rate": 5.157976731381379e-06,
"loss": 0.5969,
"step": 7975
},
{
"epoch": 0.51,
"grad_norm": 0.9223279356956482,
"learning_rate": 5.1569512466160025e-06,
"loss": 0.6288,
"step": 7976
},
{
"epoch": 0.51,
"grad_norm": 0.9694954752922058,
"learning_rate": 5.155925755241954e-06,
"loss": 0.6392,
"step": 7977
},
{
"epoch": 0.51,
"grad_norm": 0.9013630151748657,
"learning_rate": 5.1549002573024144e-06,
"loss": 0.5671,
"step": 7978
},
{
"epoch": 0.51,
"grad_norm": 0.8672821521759033,
"learning_rate": 5.153874752840564e-06,
"loss": 0.5583,
"step": 7979
},
{
"epoch": 0.51,
"grad_norm": 1.076423168182373,
"learning_rate": 5.152849241899585e-06,
"loss": 0.5713,
"step": 7980
},
{
"epoch": 0.51,
"grad_norm": 0.9117089509963989,
"learning_rate": 5.151823724522653e-06,
"loss": 0.5954,
"step": 7981
},
{
"epoch": 0.51,
"grad_norm": 0.8285648226737976,
"learning_rate": 5.150798200752953e-06,
"loss": 0.5856,
"step": 7982
},
{
"epoch": 0.51,
"grad_norm": 0.8754099607467651,
"learning_rate": 5.149772670633666e-06,
"loss": 0.5748,
"step": 7983
},
{
"epoch": 0.51,
"grad_norm": 0.8837385177612305,
"learning_rate": 5.148747134207974e-06,
"loss": 0.5615,
"step": 7984
},
{
"epoch": 0.51,
"grad_norm": 0.8902435302734375,
"learning_rate": 5.147721591519056e-06,
"loss": 0.5814,
"step": 7985
},
{
"epoch": 0.51,
"grad_norm": 0.8963085412979126,
"learning_rate": 5.146696042610095e-06,
"loss": 0.5477,
"step": 7986
},
{
"epoch": 0.51,
"grad_norm": 0.9228818416595459,
"learning_rate": 5.145670487524276e-06,
"loss": 0.6119,
"step": 7987
},
{
"epoch": 0.51,
"grad_norm": 0.9034307599067688,
"learning_rate": 5.144644926304778e-06,
"loss": 0.6482,
"step": 7988
},
{
"epoch": 0.51,
"grad_norm": 0.9602980017662048,
"learning_rate": 5.1436193589947855e-06,
"loss": 0.5889,
"step": 7989
},
{
"epoch": 0.51,
"grad_norm": 0.9697549939155579,
"learning_rate": 5.1425937856374816e-06,
"loss": 0.6406,
"step": 7990
},
{
"epoch": 0.51,
"grad_norm": 0.8972442746162415,
"learning_rate": 5.141568206276051e-06,
"loss": 0.6258,
"step": 7991
},
{
"epoch": 0.51,
"grad_norm": 0.8347691297531128,
"learning_rate": 5.140542620953675e-06,
"loss": 0.5349,
"step": 7992
},
{
"epoch": 0.51,
"grad_norm": 1.0211116075515747,
"learning_rate": 5.139517029713537e-06,
"loss": 0.665,
"step": 7993
},
{
"epoch": 0.51,
"grad_norm": 0.8696901202201843,
"learning_rate": 5.138491432598822e-06,
"loss": 0.5532,
"step": 7994
},
{
"epoch": 0.51,
"grad_norm": 0.9338617920875549,
"learning_rate": 5.137465829652716e-06,
"loss": 0.5866,
"step": 7995
},
{
"epoch": 0.51,
"grad_norm": 0.9527667760848999,
"learning_rate": 5.136440220918401e-06,
"loss": 0.5829,
"step": 7996
},
{
"epoch": 0.51,
"grad_norm": 0.9329034686088562,
"learning_rate": 5.135414606439063e-06,
"loss": 0.6293,
"step": 7997
},
{
"epoch": 0.51,
"grad_norm": 0.924534797668457,
"learning_rate": 5.134388986257887e-06,
"loss": 0.5911,
"step": 7998
},
{
"epoch": 0.51,
"grad_norm": 0.8370699286460876,
"learning_rate": 5.133363360418059e-06,
"loss": 0.6032,
"step": 7999
},
{
"epoch": 0.51,
"grad_norm": 0.8892449736595154,
"learning_rate": 5.132337728962763e-06,
"loss": 0.6089,
"step": 8000
},
{
"epoch": 0.51,
"grad_norm": 0.8967301249504089,
"learning_rate": 5.131312091935186e-06,
"loss": 0.5924,
"step": 8001
},
{
"epoch": 0.51,
"grad_norm": 0.8810504674911499,
"learning_rate": 5.130286449378513e-06,
"loss": 0.6515,
"step": 8002
},
{
"epoch": 0.51,
"grad_norm": 0.9304781556129456,
"learning_rate": 5.129260801335932e-06,
"loss": 0.6081,
"step": 8003
},
{
"epoch": 0.51,
"grad_norm": 0.8867761492729187,
"learning_rate": 5.128235147850629e-06,
"loss": 0.6011,
"step": 8004
},
{
"epoch": 0.51,
"grad_norm": 0.9013170003890991,
"learning_rate": 5.127209488965787e-06,
"loss": 0.5825,
"step": 8005
},
{
"epoch": 0.51,
"grad_norm": 0.8430556654930115,
"learning_rate": 5.1261838247246e-06,
"loss": 0.5425,
"step": 8006
},
{
"epoch": 0.51,
"grad_norm": 0.8869624733924866,
"learning_rate": 5.125158155170248e-06,
"loss": 0.5767,
"step": 8007
},
{
"epoch": 0.51,
"grad_norm": 0.9233295321464539,
"learning_rate": 5.124132480345922e-06,
"loss": 0.5623,
"step": 8008
},
{
"epoch": 0.51,
"grad_norm": 0.9272169470787048,
"learning_rate": 5.123106800294809e-06,
"loss": 0.596,
"step": 8009
},
{
"epoch": 0.51,
"grad_norm": 0.8874875903129578,
"learning_rate": 5.122081115060098e-06,
"loss": 0.5913,
"step": 8010
},
{
"epoch": 0.51,
"grad_norm": 0.8825517296791077,
"learning_rate": 5.121055424684975e-06,
"loss": 0.5532,
"step": 8011
},
{
"epoch": 0.51,
"grad_norm": 0.8856724500656128,
"learning_rate": 5.12002972921263e-06,
"loss": 0.5788,
"step": 8012
},
{
"epoch": 0.51,
"grad_norm": 0.9288915395736694,
"learning_rate": 5.119004028686249e-06,
"loss": 0.5705,
"step": 8013
},
{
"epoch": 0.51,
"grad_norm": 0.897471010684967,
"learning_rate": 5.117978323149025e-06,
"loss": 0.6277,
"step": 8014
},
{
"epoch": 0.51,
"grad_norm": 0.8995818495750427,
"learning_rate": 5.116952612644141e-06,
"loss": 0.5288,
"step": 8015
},
{
"epoch": 0.51,
"grad_norm": 0.9045858979225159,
"learning_rate": 5.1159268972147915e-06,
"loss": 0.6051,
"step": 8016
},
{
"epoch": 0.51,
"grad_norm": 0.913692057132721,
"learning_rate": 5.114901176904164e-06,
"loss": 0.5748,
"step": 8017
},
{
"epoch": 0.51,
"grad_norm": 0.865149736404419,
"learning_rate": 5.113875451755447e-06,
"loss": 0.6055,
"step": 8018
},
{
"epoch": 0.51,
"grad_norm": 0.828730046749115,
"learning_rate": 5.11284972181183e-06,
"loss": 0.5581,
"step": 8019
},
{
"epoch": 0.51,
"grad_norm": 0.8808106184005737,
"learning_rate": 5.111823987116504e-06,
"loss": 0.5795,
"step": 8020
},
{
"epoch": 0.51,
"grad_norm": 0.8963019847869873,
"learning_rate": 5.110798247712661e-06,
"loss": 0.5901,
"step": 8021
},
{
"epoch": 0.51,
"grad_norm": 0.9240871667861938,
"learning_rate": 5.109772503643486e-06,
"loss": 0.6433,
"step": 8022
},
{
"epoch": 0.51,
"grad_norm": 0.8749609589576721,
"learning_rate": 5.108746754952177e-06,
"loss": 0.5391,
"step": 8023
},
{
"epoch": 0.51,
"grad_norm": 0.906970202922821,
"learning_rate": 5.107721001681915e-06,
"loss": 0.6189,
"step": 8024
},
{
"epoch": 0.51,
"grad_norm": 0.8912851214408875,
"learning_rate": 5.1066952438759e-06,
"loss": 0.5633,
"step": 8025
},
{
"epoch": 0.51,
"grad_norm": 0.8463259339332581,
"learning_rate": 5.105669481577319e-06,
"loss": 0.6057,
"step": 8026
},
{
"epoch": 0.51,
"grad_norm": 0.8847749829292297,
"learning_rate": 5.104643714829362e-06,
"loss": 0.6348,
"step": 8027
},
{
"epoch": 0.51,
"grad_norm": 0.8036050796508789,
"learning_rate": 5.103617943675224e-06,
"loss": 0.586,
"step": 8028
},
{
"epoch": 0.51,
"grad_norm": 0.8839384913444519,
"learning_rate": 5.102592168158095e-06,
"loss": 0.5924,
"step": 8029
},
{
"epoch": 0.51,
"grad_norm": 0.9251484870910645,
"learning_rate": 5.101566388321165e-06,
"loss": 0.572,
"step": 8030
},
{
"epoch": 0.51,
"grad_norm": 0.8279865980148315,
"learning_rate": 5.100540604207629e-06,
"loss": 0.5797,
"step": 8031
},
{
"epoch": 0.51,
"grad_norm": 0.9217899441719055,
"learning_rate": 5.099514815860678e-06,
"loss": 0.5839,
"step": 8032
},
{
"epoch": 0.51,
"grad_norm": 0.903213381767273,
"learning_rate": 5.098489023323504e-06,
"loss": 0.5239,
"step": 8033
},
{
"epoch": 0.51,
"grad_norm": 0.8775154948234558,
"learning_rate": 5.0974632266393e-06,
"loss": 0.591,
"step": 8034
},
{
"epoch": 0.51,
"grad_norm": 0.8651240468025208,
"learning_rate": 5.0964374258512585e-06,
"loss": 0.5767,
"step": 8035
},
{
"epoch": 0.51,
"grad_norm": 0.975160539150238,
"learning_rate": 5.0954116210025725e-06,
"loss": 0.6185,
"step": 8036
},
{
"epoch": 0.51,
"grad_norm": 0.9247754812240601,
"learning_rate": 5.094385812136435e-06,
"loss": 0.5795,
"step": 8037
},
{
"epoch": 0.51,
"grad_norm": 0.8965883255004883,
"learning_rate": 5.09335999929604e-06,
"loss": 0.577,
"step": 8038
},
{
"epoch": 0.51,
"grad_norm": 0.8666002750396729,
"learning_rate": 5.092334182524578e-06,
"loss": 0.5766,
"step": 8039
},
{
"epoch": 0.51,
"grad_norm": 0.94881272315979,
"learning_rate": 5.091308361865247e-06,
"loss": 0.6627,
"step": 8040
},
{
"epoch": 0.51,
"grad_norm": 0.8409824371337891,
"learning_rate": 5.090282537361237e-06,
"loss": 0.5406,
"step": 8041
},
{
"epoch": 0.51,
"grad_norm": 0.9426827430725098,
"learning_rate": 5.089256709055745e-06,
"loss": 0.6425,
"step": 8042
},
{
"epoch": 0.51,
"grad_norm": 0.925849199295044,
"learning_rate": 5.088230876991962e-06,
"loss": 0.5865,
"step": 8043
},
{
"epoch": 0.51,
"grad_norm": 0.8730261325836182,
"learning_rate": 5.087205041213085e-06,
"loss": 0.6125,
"step": 8044
},
{
"epoch": 0.51,
"grad_norm": 0.9450942277908325,
"learning_rate": 5.086179201762306e-06,
"loss": 0.6118,
"step": 8045
},
{
"epoch": 0.51,
"grad_norm": 0.9059416055679321,
"learning_rate": 5.085153358682822e-06,
"loss": 0.5707,
"step": 8046
},
{
"epoch": 0.51,
"grad_norm": 0.867950975894928,
"learning_rate": 5.084127512017823e-06,
"loss": 0.5792,
"step": 8047
},
{
"epoch": 0.51,
"grad_norm": 0.8605546951293945,
"learning_rate": 5.083101661810511e-06,
"loss": 0.5895,
"step": 8048
},
{
"epoch": 0.51,
"grad_norm": 0.9312983155250549,
"learning_rate": 5.082075808104075e-06,
"loss": 0.5799,
"step": 8049
},
{
"epoch": 0.51,
"grad_norm": 0.8603020310401917,
"learning_rate": 5.081049950941713e-06,
"loss": 0.6026,
"step": 8050
},
{
"epoch": 0.51,
"grad_norm": 0.8669036626815796,
"learning_rate": 5.080024090366618e-06,
"loss": 0.6017,
"step": 8051
},
{
"epoch": 0.51,
"grad_norm": 0.9047536253929138,
"learning_rate": 5.078998226421989e-06,
"loss": 0.6222,
"step": 8052
},
{
"epoch": 0.51,
"grad_norm": 0.9225742816925049,
"learning_rate": 5.07797235915102e-06,
"loss": 0.6049,
"step": 8053
},
{
"epoch": 0.51,
"grad_norm": 0.9004045724868774,
"learning_rate": 5.076946488596905e-06,
"loss": 0.6042,
"step": 8054
},
{
"epoch": 0.51,
"grad_norm": 0.9334387183189392,
"learning_rate": 5.07592061480284e-06,
"loss": 0.6317,
"step": 8055
},
{
"epoch": 0.51,
"grad_norm": 0.890455424785614,
"learning_rate": 5.074894737812023e-06,
"loss": 0.5758,
"step": 8056
},
{
"epoch": 0.51,
"grad_norm": 0.8868134021759033,
"learning_rate": 5.07386885766765e-06,
"loss": 0.6435,
"step": 8057
},
{
"epoch": 0.51,
"grad_norm": 0.8172594904899597,
"learning_rate": 5.072842974412916e-06,
"loss": 0.5172,
"step": 8058
},
{
"epoch": 0.51,
"grad_norm": 0.8145936727523804,
"learning_rate": 5.071817088091017e-06,
"loss": 0.5327,
"step": 8059
},
{
"epoch": 0.51,
"grad_norm": 0.8804033994674683,
"learning_rate": 5.0707911987451496e-06,
"loss": 0.6371,
"step": 8060
},
{
"epoch": 0.51,
"grad_norm": 0.9473575353622437,
"learning_rate": 5.0697653064185125e-06,
"loss": 0.6165,
"step": 8061
},
{
"epoch": 0.51,
"grad_norm": 0.894706130027771,
"learning_rate": 5.068739411154301e-06,
"loss": 0.5932,
"step": 8062
},
{
"epoch": 0.51,
"grad_norm": 0.8926814198493958,
"learning_rate": 5.0677135129957115e-06,
"loss": 0.6044,
"step": 8063
},
{
"epoch": 0.51,
"grad_norm": 0.8846773505210876,
"learning_rate": 5.066687611985941e-06,
"loss": 0.5754,
"step": 8064
},
{
"epoch": 0.51,
"grad_norm": 0.8815335631370544,
"learning_rate": 5.065661708168188e-06,
"loss": 0.5586,
"step": 8065
},
{
"epoch": 0.51,
"grad_norm": 0.916784942150116,
"learning_rate": 5.064635801585649e-06,
"loss": 0.5575,
"step": 8066
},
{
"epoch": 0.51,
"grad_norm": 0.8539628982543945,
"learning_rate": 5.06360989228152e-06,
"loss": 0.5518,
"step": 8067
},
{
"epoch": 0.51,
"grad_norm": 0.9269511103630066,
"learning_rate": 5.062583980299002e-06,
"loss": 0.5955,
"step": 8068
},
{
"epoch": 0.51,
"grad_norm": 0.856561541557312,
"learning_rate": 5.061558065681288e-06,
"loss": 0.5672,
"step": 8069
},
{
"epoch": 0.51,
"grad_norm": 0.8841691017150879,
"learning_rate": 5.060532148471578e-06,
"loss": 0.6283,
"step": 8070
},
{
"epoch": 0.51,
"grad_norm": 0.832876980304718,
"learning_rate": 5.059506228713071e-06,
"loss": 0.5384,
"step": 8071
},
{
"epoch": 0.51,
"grad_norm": 0.8955254554748535,
"learning_rate": 5.058480306448965e-06,
"loss": 0.6119,
"step": 8072
},
{
"epoch": 0.51,
"grad_norm": 0.8828347325325012,
"learning_rate": 5.057454381722455e-06,
"loss": 0.5756,
"step": 8073
},
{
"epoch": 0.51,
"grad_norm": 0.9125185012817383,
"learning_rate": 5.056428454576741e-06,
"loss": 0.5958,
"step": 8074
},
{
"epoch": 0.51,
"grad_norm": 0.8723667860031128,
"learning_rate": 5.0554025250550195e-06,
"loss": 0.6055,
"step": 8075
},
{
"epoch": 0.51,
"grad_norm": 0.8208953738212585,
"learning_rate": 5.054376593200493e-06,
"loss": 0.5141,
"step": 8076
},
{
"epoch": 0.51,
"grad_norm": 0.8895772695541382,
"learning_rate": 5.053350659056356e-06,
"loss": 0.5693,
"step": 8077
},
{
"epoch": 0.51,
"grad_norm": 0.9235116243362427,
"learning_rate": 5.052324722665809e-06,
"loss": 0.6041,
"step": 8078
},
{
"epoch": 0.51,
"grad_norm": 0.9034695625305176,
"learning_rate": 5.0512987840720495e-06,
"loss": 0.5887,
"step": 8079
},
{
"epoch": 0.51,
"grad_norm": 0.9228042960166931,
"learning_rate": 5.0502728433182765e-06,
"loss": 0.5828,
"step": 8080
},
{
"epoch": 0.51,
"grad_norm": 0.927101731300354,
"learning_rate": 5.049246900447689e-06,
"loss": 0.6073,
"step": 8081
},
{
"epoch": 0.51,
"grad_norm": 0.8888689279556274,
"learning_rate": 5.048220955503487e-06,
"loss": 0.581,
"step": 8082
},
{
"epoch": 0.51,
"grad_norm": 0.8097081184387207,
"learning_rate": 5.047195008528868e-06,
"loss": 0.5485,
"step": 8083
},
{
"epoch": 0.51,
"grad_norm": 0.8781763315200806,
"learning_rate": 5.04616905956703e-06,
"loss": 0.555,
"step": 8084
},
{
"epoch": 0.51,
"grad_norm": 0.8688362836837769,
"learning_rate": 5.045143108661174e-06,
"loss": 0.5991,
"step": 8085
},
{
"epoch": 0.51,
"grad_norm": 0.8414211869239807,
"learning_rate": 5.044117155854499e-06,
"loss": 0.6283,
"step": 8086
},
{
"epoch": 0.51,
"grad_norm": 0.7982466816902161,
"learning_rate": 5.043091201190204e-06,
"loss": 0.5701,
"step": 8087
},
{
"epoch": 0.51,
"grad_norm": 0.8860836029052734,
"learning_rate": 5.042065244711488e-06,
"loss": 0.5306,
"step": 8088
},
{
"epoch": 0.51,
"grad_norm": 0.8865799903869629,
"learning_rate": 5.041039286461552e-06,
"loss": 0.5947,
"step": 8089
},
{
"epoch": 0.51,
"grad_norm": 0.8962934017181396,
"learning_rate": 5.040013326483593e-06,
"loss": 0.596,
"step": 8090
},
{
"epoch": 0.51,
"grad_norm": 1.0645703077316284,
"learning_rate": 5.038987364820813e-06,
"loss": 0.65,
"step": 8091
},
{
"epoch": 0.51,
"grad_norm": 0.8976729512214661,
"learning_rate": 5.037961401516411e-06,
"loss": 0.5364,
"step": 8092
},
{
"epoch": 0.51,
"grad_norm": 0.8850423097610474,
"learning_rate": 5.036935436613586e-06,
"loss": 0.5901,
"step": 8093
},
{
"epoch": 0.51,
"grad_norm": 0.9318758845329285,
"learning_rate": 5.0359094701555375e-06,
"loss": 0.5786,
"step": 8094
},
{
"epoch": 0.51,
"grad_norm": 0.8748635649681091,
"learning_rate": 5.034883502185467e-06,
"loss": 0.5917,
"step": 8095
},
{
"epoch": 0.51,
"grad_norm": 0.894095242023468,
"learning_rate": 5.033857532746573e-06,
"loss": 0.5896,
"step": 8096
},
{
"epoch": 0.51,
"grad_norm": 0.8149279952049255,
"learning_rate": 5.032831561882057e-06,
"loss": 0.5542,
"step": 8097
},
{
"epoch": 0.51,
"grad_norm": 0.852733850479126,
"learning_rate": 5.0318055896351185e-06,
"loss": 0.5888,
"step": 8098
},
{
"epoch": 0.51,
"grad_norm": 0.7920023202896118,
"learning_rate": 5.030779616048955e-06,
"loss": 0.513,
"step": 8099
},
{
"epoch": 0.51,
"grad_norm": 0.9614823460578918,
"learning_rate": 5.02975364116677e-06,
"loss": 0.594,
"step": 8100
},
{
"epoch": 0.51,
"grad_norm": 0.9171684980392456,
"learning_rate": 5.0287276650317626e-06,
"loss": 0.5822,
"step": 8101
},
{
"epoch": 0.51,
"grad_norm": 0.8981472849845886,
"learning_rate": 5.027701687687135e-06,
"loss": 0.601,
"step": 8102
},
{
"epoch": 0.51,
"grad_norm": 0.8312231302261353,
"learning_rate": 5.026675709176084e-06,
"loss": 0.5113,
"step": 8103
},
{
"epoch": 0.51,
"grad_norm": 0.915739893913269,
"learning_rate": 5.0256497295418115e-06,
"loss": 0.5998,
"step": 8104
},
{
"epoch": 0.51,
"grad_norm": 0.9062038660049438,
"learning_rate": 5.0246237488275185e-06,
"loss": 0.5541,
"step": 8105
},
{
"epoch": 0.51,
"grad_norm": 0.8854556679725647,
"learning_rate": 5.0235977670764055e-06,
"loss": 0.5467,
"step": 8106
},
{
"epoch": 0.51,
"grad_norm": 0.8781667947769165,
"learning_rate": 5.022571784331672e-06,
"loss": 0.6031,
"step": 8107
},
{
"epoch": 0.51,
"grad_norm": 0.8494471311569214,
"learning_rate": 5.021545800636519e-06,
"loss": 0.5906,
"step": 8108
},
{
"epoch": 0.51,
"grad_norm": 0.8764198422431946,
"learning_rate": 5.020519816034148e-06,
"loss": 0.5826,
"step": 8109
},
{
"epoch": 0.51,
"grad_norm": 0.9023407101631165,
"learning_rate": 5.019493830567758e-06,
"loss": 0.605,
"step": 8110
},
{
"epoch": 0.51,
"grad_norm": 0.8451856374740601,
"learning_rate": 5.018467844280553e-06,
"loss": 0.5689,
"step": 8111
},
{
"epoch": 0.51,
"grad_norm": 0.8786736130714417,
"learning_rate": 5.0174418572157276e-06,
"loss": 0.5584,
"step": 8112
},
{
"epoch": 0.51,
"grad_norm": 0.8404189348220825,
"learning_rate": 5.0164158694164884e-06,
"loss": 0.5621,
"step": 8113
},
{
"epoch": 0.51,
"grad_norm": 0.9702364802360535,
"learning_rate": 5.015389880926035e-06,
"loss": 0.614,
"step": 8114
},
{
"epoch": 0.51,
"grad_norm": 0.8589154481887817,
"learning_rate": 5.014363891787567e-06,
"loss": 0.5671,
"step": 8115
},
{
"epoch": 0.51,
"grad_norm": 0.9409849047660828,
"learning_rate": 5.013337902044283e-06,
"loss": 0.5915,
"step": 8116
},
{
"epoch": 0.51,
"grad_norm": 1.0001648664474487,
"learning_rate": 5.0123119117393894e-06,
"loss": 0.5942,
"step": 8117
},
{
"epoch": 0.51,
"grad_norm": 0.86786288022995,
"learning_rate": 5.011285920916082e-06,
"loss": 0.6033,
"step": 8118
},
{
"epoch": 0.51,
"grad_norm": 0.8556507229804993,
"learning_rate": 5.010259929617565e-06,
"loss": 0.6032,
"step": 8119
},
{
"epoch": 0.51,
"grad_norm": 0.9855061769485474,
"learning_rate": 5.009233937887036e-06,
"loss": 0.5679,
"step": 8120
},
{
"epoch": 0.51,
"grad_norm": 0.8764082789421082,
"learning_rate": 5.0082079457677e-06,
"loss": 0.558,
"step": 8121
},
{
"epoch": 0.51,
"grad_norm": 0.9630783796310425,
"learning_rate": 5.007181953302755e-06,
"loss": 0.6147,
"step": 8122
},
{
"epoch": 0.51,
"grad_norm": 0.882135808467865,
"learning_rate": 5.006155960535405e-06,
"loss": 0.606,
"step": 8123
},
{
"epoch": 0.51,
"grad_norm": 0.8694536685943604,
"learning_rate": 5.005129967508845e-06,
"loss": 0.6031,
"step": 8124
},
{
"epoch": 0.51,
"grad_norm": 0.8778092265129089,
"learning_rate": 5.004103974266284e-06,
"loss": 0.5793,
"step": 8125
},
{
"epoch": 0.51,
"grad_norm": 0.869263768196106,
"learning_rate": 5.0030779808509155e-06,
"loss": 0.5959,
"step": 8126
},
{
"epoch": 0.51,
"grad_norm": 0.8371315598487854,
"learning_rate": 5.002051987305947e-06,
"loss": 0.5804,
"step": 8127
},
{
"epoch": 0.51,
"grad_norm": 0.8696556091308594,
"learning_rate": 5.0010259936745735e-06,
"loss": 0.544,
"step": 8128
},
{
"epoch": 0.52,
"grad_norm": 0.8770456314086914,
"learning_rate": 5e-06,
"loss": 0.6189,
"step": 8129
},
{
"epoch": 0.52,
"grad_norm": 0.8599352240562439,
"learning_rate": 4.998974006325428e-06,
"loss": 0.5789,
"step": 8130
},
{
"epoch": 0.52,
"grad_norm": 0.9081400036811829,
"learning_rate": 4.997948012694056e-06,
"loss": 0.5858,
"step": 8131
},
{
"epoch": 0.52,
"grad_norm": 0.9235000014305115,
"learning_rate": 4.9969220191490845e-06,
"loss": 0.6132,
"step": 8132
},
{
"epoch": 0.52,
"grad_norm": 0.8584170341491699,
"learning_rate": 4.995896025733719e-06,
"loss": 0.6016,
"step": 8133
},
{
"epoch": 0.52,
"grad_norm": 0.9502587914466858,
"learning_rate": 4.994870032491156e-06,
"loss": 0.5802,
"step": 8134
},
{
"epoch": 0.52,
"grad_norm": 0.8924700021743774,
"learning_rate": 4.993844039464598e-06,
"loss": 0.6686,
"step": 8135
},
{
"epoch": 0.52,
"grad_norm": 0.8873922824859619,
"learning_rate": 4.992818046697245e-06,
"loss": 0.5937,
"step": 8136
},
{
"epoch": 0.52,
"grad_norm": 0.9198696613311768,
"learning_rate": 4.991792054232301e-06,
"loss": 0.5419,
"step": 8137
},
{
"epoch": 0.52,
"grad_norm": 0.8334248661994934,
"learning_rate": 4.990766062112966e-06,
"loss": 0.5722,
"step": 8138
},
{
"epoch": 0.52,
"grad_norm": 0.8983075022697449,
"learning_rate": 4.989740070382438e-06,
"loss": 0.588,
"step": 8139
},
{
"epoch": 0.52,
"grad_norm": 0.8269035220146179,
"learning_rate": 4.988714079083918e-06,
"loss": 0.5973,
"step": 8140
},
{
"epoch": 0.52,
"grad_norm": 1.0999228954315186,
"learning_rate": 4.987688088260613e-06,
"loss": 0.5795,
"step": 8141
},
{
"epoch": 0.52,
"grad_norm": 0.9255691766738892,
"learning_rate": 4.986662097955718e-06,
"loss": 0.5924,
"step": 8142
},
{
"epoch": 0.52,
"grad_norm": 0.8680478930473328,
"learning_rate": 4.985636108212435e-06,
"loss": 0.6024,
"step": 8143
},
{
"epoch": 0.52,
"grad_norm": 0.844215452671051,
"learning_rate": 4.984610119073965e-06,
"loss": 0.5356,
"step": 8144
},
{
"epoch": 0.52,
"grad_norm": 0.8514224886894226,
"learning_rate": 4.9835841305835115e-06,
"loss": 0.5734,
"step": 8145
},
{
"epoch": 0.52,
"grad_norm": 0.8678837418556213,
"learning_rate": 4.982558142784273e-06,
"loss": 0.6142,
"step": 8146
},
{
"epoch": 0.52,
"grad_norm": 0.8894163966178894,
"learning_rate": 4.98153215571945e-06,
"loss": 0.6196,
"step": 8147
},
{
"epoch": 0.52,
"grad_norm": 0.9071709513664246,
"learning_rate": 4.980506169432243e-06,
"loss": 0.5878,
"step": 8148
},
{
"epoch": 0.52,
"grad_norm": 0.9013687372207642,
"learning_rate": 4.979480183965852e-06,
"loss": 0.6077,
"step": 8149
},
{
"epoch": 0.52,
"grad_norm": 0.8970010876655579,
"learning_rate": 4.9784541993634824e-06,
"loss": 0.5885,
"step": 8150
},
{
"epoch": 0.52,
"grad_norm": 0.9553268551826477,
"learning_rate": 4.977428215668329e-06,
"loss": 0.6642,
"step": 8151
},
{
"epoch": 0.52,
"grad_norm": 0.8925964832305908,
"learning_rate": 4.976402232923597e-06,
"loss": 0.6099,
"step": 8152
},
{
"epoch": 0.52,
"grad_norm": 0.9235319495201111,
"learning_rate": 4.9753762511724815e-06,
"loss": 0.548,
"step": 8153
},
{
"epoch": 0.52,
"grad_norm": 0.8916828036308289,
"learning_rate": 4.974350270458189e-06,
"loss": 0.6115,
"step": 8154
},
{
"epoch": 0.52,
"grad_norm": 0.8752048015594482,
"learning_rate": 4.9733242908239175e-06,
"loss": 0.6241,
"step": 8155
},
{
"epoch": 0.52,
"grad_norm": 0.9507616758346558,
"learning_rate": 4.972298312312867e-06,
"loss": 0.6082,
"step": 8156
},
{
"epoch": 0.52,
"grad_norm": 0.9458578824996948,
"learning_rate": 4.9712723349682365e-06,
"loss": 0.5945,
"step": 8157
},
{
"epoch": 0.52,
"grad_norm": 0.8483637571334839,
"learning_rate": 4.970246358833231e-06,
"loss": 0.5418,
"step": 8158
},
{
"epoch": 0.52,
"grad_norm": 0.8706662654876709,
"learning_rate": 4.969220383951046e-06,
"loss": 0.5383,
"step": 8159
},
{
"epoch": 0.52,
"grad_norm": 0.843956470489502,
"learning_rate": 4.968194410364884e-06,
"loss": 0.5799,
"step": 8160
},
{
"epoch": 0.52,
"grad_norm": 0.887324333190918,
"learning_rate": 4.967168438117945e-06,
"loss": 0.5922,
"step": 8161
},
{
"epoch": 0.52,
"grad_norm": 0.8479996919631958,
"learning_rate": 4.966142467253428e-06,
"loss": 0.5402,
"step": 8162
},
{
"epoch": 0.52,
"grad_norm": 0.906588077545166,
"learning_rate": 4.965116497814534e-06,
"loss": 0.6009,
"step": 8163
},
{
"epoch": 0.52,
"grad_norm": 0.8441720008850098,
"learning_rate": 4.964090529844464e-06,
"loss": 0.5834,
"step": 8164
},
{
"epoch": 0.52,
"grad_norm": 0.8537503480911255,
"learning_rate": 4.963064563386416e-06,
"loss": 0.5948,
"step": 8165
},
{
"epoch": 0.52,
"grad_norm": 0.9106093049049377,
"learning_rate": 4.96203859848359e-06,
"loss": 0.5898,
"step": 8166
},
{
"epoch": 0.52,
"grad_norm": 0.9316169619560242,
"learning_rate": 4.961012635179188e-06,
"loss": 0.5515,
"step": 8167
},
{
"epoch": 0.52,
"grad_norm": 0.8743208646774292,
"learning_rate": 4.959986673516408e-06,
"loss": 0.5791,
"step": 8168
},
{
"epoch": 0.52,
"grad_norm": 0.878601610660553,
"learning_rate": 4.95896071353845e-06,
"loss": 0.5654,
"step": 8169
},
{
"epoch": 0.52,
"grad_norm": 0.9046491384506226,
"learning_rate": 4.9579347552885125e-06,
"loss": 0.6205,
"step": 8170
},
{
"epoch": 0.52,
"grad_norm": 0.9015951156616211,
"learning_rate": 4.956908798809797e-06,
"loss": 0.6079,
"step": 8171
},
{
"epoch": 0.52,
"grad_norm": 0.9551298022270203,
"learning_rate": 4.955882844145503e-06,
"loss": 0.6354,
"step": 8172
},
{
"epoch": 0.52,
"grad_norm": 0.9143627882003784,
"learning_rate": 4.954856891338827e-06,
"loss": 0.6224,
"step": 8173
},
{
"epoch": 0.52,
"grad_norm": 0.9006348252296448,
"learning_rate": 4.95383094043297e-06,
"loss": 0.5597,
"step": 8174
},
{
"epoch": 0.52,
"grad_norm": 0.8101087808609009,
"learning_rate": 4.952804991471134e-06,
"loss": 0.5693,
"step": 8175
},
{
"epoch": 0.52,
"grad_norm": 0.847748339176178,
"learning_rate": 4.951779044496515e-06,
"loss": 0.5625,
"step": 8176
},
{
"epoch": 0.52,
"grad_norm": 0.950564980506897,
"learning_rate": 4.9507530995523115e-06,
"loss": 0.5894,
"step": 8177
},
{
"epoch": 0.52,
"grad_norm": 0.8164709806442261,
"learning_rate": 4.949727156681726e-06,
"loss": 0.6046,
"step": 8178
},
{
"epoch": 0.52,
"grad_norm": 0.887380838394165,
"learning_rate": 4.948701215927951e-06,
"loss": 0.5241,
"step": 8179
},
{
"epoch": 0.52,
"grad_norm": 0.8414967060089111,
"learning_rate": 4.947675277334193e-06,
"loss": 0.5771,
"step": 8180
},
{
"epoch": 0.52,
"grad_norm": 0.9173058867454529,
"learning_rate": 4.946649340943645e-06,
"loss": 0.6376,
"step": 8181
},
{
"epoch": 0.52,
"grad_norm": 0.9363717436790466,
"learning_rate": 4.9456234067995094e-06,
"loss": 0.6236,
"step": 8182
},
{
"epoch": 0.52,
"grad_norm": 0.8463205099105835,
"learning_rate": 4.9445974749449805e-06,
"loss": 0.555,
"step": 8183
},
{
"epoch": 0.52,
"grad_norm": 0.8751280307769775,
"learning_rate": 4.9435715454232615e-06,
"loss": 0.584,
"step": 8184
},
{
"epoch": 0.52,
"grad_norm": 0.9037527441978455,
"learning_rate": 4.942545618277547e-06,
"loss": 0.614,
"step": 8185
},
{
"epoch": 0.52,
"grad_norm": 0.8870174884796143,
"learning_rate": 4.9415196935510375e-06,
"loss": 0.5755,
"step": 8186
},
{
"epoch": 0.52,
"grad_norm": 0.9150660037994385,
"learning_rate": 4.940493771286929e-06,
"loss": 0.5779,
"step": 8187
},
{
"epoch": 0.52,
"grad_norm": 0.8672343492507935,
"learning_rate": 4.939467851528423e-06,
"loss": 0.5905,
"step": 8188
},
{
"epoch": 0.52,
"grad_norm": 0.8112956881523132,
"learning_rate": 4.938441934318713e-06,
"loss": 0.5317,
"step": 8189
},
{
"epoch": 0.52,
"grad_norm": 0.8447852730751038,
"learning_rate": 4.937416019701e-06,
"loss": 0.5753,
"step": 8190
},
{
"epoch": 0.52,
"grad_norm": 0.8433228135108948,
"learning_rate": 4.93639010771848e-06,
"loss": 0.5283,
"step": 8191
},
{
"epoch": 0.52,
"grad_norm": 0.8930540084838867,
"learning_rate": 4.9353641984143526e-06,
"loss": 0.5907,
"step": 8192
},
{
"epoch": 0.52,
"grad_norm": 0.8250675201416016,
"learning_rate": 4.934338291831813e-06,
"loss": 0.5775,
"step": 8193
},
{
"epoch": 0.52,
"grad_norm": 0.8587763905525208,
"learning_rate": 4.93331238801406e-06,
"loss": 0.5706,
"step": 8194
},
{
"epoch": 0.52,
"grad_norm": 0.9937714338302612,
"learning_rate": 4.932286487004291e-06,
"loss": 0.6685,
"step": 8195
},
{
"epoch": 0.52,
"grad_norm": 0.8941221833229065,
"learning_rate": 4.931260588845701e-06,
"loss": 0.5856,
"step": 8196
},
{
"epoch": 0.52,
"grad_norm": 0.8236309885978699,
"learning_rate": 4.930234693581489e-06,
"loss": 0.595,
"step": 8197
},
{
"epoch": 0.52,
"grad_norm": 0.8598278760910034,
"learning_rate": 4.929208801254851e-06,
"loss": 0.5957,
"step": 8198
},
{
"epoch": 0.52,
"grad_norm": 0.9491175413131714,
"learning_rate": 4.928182911908987e-06,
"loss": 0.6515,
"step": 8199
},
{
"epoch": 0.52,
"grad_norm": 0.847444474697113,
"learning_rate": 4.927157025587086e-06,
"loss": 0.5541,
"step": 8200
},
{
"epoch": 0.52,
"grad_norm": 0.9040679335594177,
"learning_rate": 4.926131142332351e-06,
"loss": 0.6053,
"step": 8201
},
{
"epoch": 0.52,
"grad_norm": 0.8832661509513855,
"learning_rate": 4.925105262187978e-06,
"loss": 0.6243,
"step": 8202
},
{
"epoch": 0.52,
"grad_norm": 0.8717993497848511,
"learning_rate": 4.924079385197162e-06,
"loss": 0.563,
"step": 8203
},
{
"epoch": 0.52,
"grad_norm": 0.8877679705619812,
"learning_rate": 4.923053511403096e-06,
"loss": 0.6599,
"step": 8204
},
{
"epoch": 0.52,
"grad_norm": 0.8722405433654785,
"learning_rate": 4.922027640848981e-06,
"loss": 0.5793,
"step": 8205
},
{
"epoch": 0.52,
"grad_norm": 0.9440850019454956,
"learning_rate": 4.921001773578012e-06,
"loss": 0.6429,
"step": 8206
},
{
"epoch": 0.52,
"grad_norm": 0.9616214632987976,
"learning_rate": 4.9199759096333825e-06,
"loss": 0.6532,
"step": 8207
},
{
"epoch": 0.52,
"grad_norm": 0.8866004943847656,
"learning_rate": 4.918950049058289e-06,
"loss": 0.5907,
"step": 8208
},
{
"epoch": 0.52,
"grad_norm": 0.8617315888404846,
"learning_rate": 4.9179241918959255e-06,
"loss": 0.6039,
"step": 8209
},
{
"epoch": 0.52,
"grad_norm": 0.8040612936019897,
"learning_rate": 4.916898338189491e-06,
"loss": 0.5269,
"step": 8210
},
{
"epoch": 0.52,
"grad_norm": 0.8695709705352783,
"learning_rate": 4.9158724879821775e-06,
"loss": 0.5651,
"step": 8211
},
{
"epoch": 0.52,
"grad_norm": 0.8399918675422668,
"learning_rate": 4.914846641317181e-06,
"loss": 0.5193,
"step": 8212
},
{
"epoch": 0.52,
"grad_norm": 0.8823307752609253,
"learning_rate": 4.913820798237695e-06,
"loss": 0.5814,
"step": 8213
},
{
"epoch": 0.52,
"grad_norm": 0.9517965912818909,
"learning_rate": 4.912794958786917e-06,
"loss": 0.5904,
"step": 8214
},
{
"epoch": 0.52,
"grad_norm": 0.9135156273841858,
"learning_rate": 4.91176912300804e-06,
"loss": 0.5795,
"step": 8215
},
{
"epoch": 0.52,
"grad_norm": 1.0179460048675537,
"learning_rate": 4.9107432909442575e-06,
"loss": 0.5925,
"step": 8216
},
{
"epoch": 0.52,
"grad_norm": 0.91028892993927,
"learning_rate": 4.909717462638763e-06,
"loss": 0.625,
"step": 8217
},
{
"epoch": 0.52,
"grad_norm": 0.9520250558853149,
"learning_rate": 4.908691638134754e-06,
"loss": 0.6201,
"step": 8218
},
{
"epoch": 0.52,
"grad_norm": 0.897201418876648,
"learning_rate": 4.907665817475424e-06,
"loss": 0.5532,
"step": 8219
},
{
"epoch": 0.52,
"grad_norm": 0.8576155304908752,
"learning_rate": 4.906640000703963e-06,
"loss": 0.5918,
"step": 8220
},
{
"epoch": 0.52,
"grad_norm": 0.8770981431007385,
"learning_rate": 4.905614187863565e-06,
"loss": 0.6275,
"step": 8221
},
{
"epoch": 0.52,
"grad_norm": 1.008365273475647,
"learning_rate": 4.904588378997428e-06,
"loss": 0.6307,
"step": 8222
},
{
"epoch": 0.52,
"grad_norm": 0.8657634258270264,
"learning_rate": 4.903562574148744e-06,
"loss": 0.6345,
"step": 8223
},
{
"epoch": 0.52,
"grad_norm": 0.9766127467155457,
"learning_rate": 4.902536773360702e-06,
"loss": 0.5598,
"step": 8224
},
{
"epoch": 0.52,
"grad_norm": 0.8664228916168213,
"learning_rate": 4.9015109766764985e-06,
"loss": 0.6031,
"step": 8225
},
{
"epoch": 0.52,
"grad_norm": 0.8865102529525757,
"learning_rate": 4.900485184139323e-06,
"loss": 0.5766,
"step": 8226
},
{
"epoch": 0.52,
"grad_norm": 0.9038271307945251,
"learning_rate": 4.899459395792373e-06,
"loss": 0.6025,
"step": 8227
},
{
"epoch": 0.52,
"grad_norm": 0.8609294295310974,
"learning_rate": 4.8984336116788355e-06,
"loss": 0.5279,
"step": 8228
},
{
"epoch": 0.52,
"grad_norm": 0.846961259841919,
"learning_rate": 4.897407831841908e-06,
"loss": 0.5631,
"step": 8229
},
{
"epoch": 0.52,
"grad_norm": 0.8961449861526489,
"learning_rate": 4.8963820563247765e-06,
"loss": 0.5892,
"step": 8230
},
{
"epoch": 0.52,
"grad_norm": 0.9013886451721191,
"learning_rate": 4.8953562851706385e-06,
"loss": 0.5458,
"step": 8231
},
{
"epoch": 0.52,
"grad_norm": 0.8823043704032898,
"learning_rate": 4.894330518422683e-06,
"loss": 0.5935,
"step": 8232
},
{
"epoch": 0.52,
"grad_norm": 0.8829339742660522,
"learning_rate": 4.893304756124102e-06,
"loss": 0.5716,
"step": 8233
},
{
"epoch": 0.52,
"grad_norm": 0.8946317434310913,
"learning_rate": 4.8922789983180854e-06,
"loss": 0.6174,
"step": 8234
},
{
"epoch": 0.52,
"grad_norm": 0.8930938839912415,
"learning_rate": 4.891253245047826e-06,
"loss": 0.5584,
"step": 8235
},
{
"epoch": 0.52,
"grad_norm": 0.8877846002578735,
"learning_rate": 4.890227496356515e-06,
"loss": 0.5851,
"step": 8236
},
{
"epoch": 0.52,
"grad_norm": 0.8552438616752625,
"learning_rate": 4.889201752287342e-06,
"loss": 0.5844,
"step": 8237
},
{
"epoch": 0.52,
"grad_norm": 0.9162623882293701,
"learning_rate": 4.888176012883496e-06,
"loss": 0.6057,
"step": 8238
},
{
"epoch": 0.52,
"grad_norm": 0.8288585543632507,
"learning_rate": 4.88715027818817e-06,
"loss": 0.5871,
"step": 8239
},
{
"epoch": 0.52,
"grad_norm": 0.8896382451057434,
"learning_rate": 4.886124548244555e-06,
"loss": 0.6483,
"step": 8240
},
{
"epoch": 0.52,
"grad_norm": 0.9036986231803894,
"learning_rate": 4.885098823095838e-06,
"loss": 0.609,
"step": 8241
},
{
"epoch": 0.52,
"grad_norm": 0.828501284122467,
"learning_rate": 4.884073102785209e-06,
"loss": 0.5929,
"step": 8242
},
{
"epoch": 0.52,
"grad_norm": 0.8982778191566467,
"learning_rate": 4.883047387355858e-06,
"loss": 0.5726,
"step": 8243
},
{
"epoch": 0.52,
"grad_norm": 0.9407196640968323,
"learning_rate": 4.882021676850977e-06,
"loss": 0.5888,
"step": 8244
},
{
"epoch": 0.52,
"grad_norm": 0.9057026505470276,
"learning_rate": 4.880995971313752e-06,
"loss": 0.5436,
"step": 8245
},
{
"epoch": 0.52,
"grad_norm": 0.8921209573745728,
"learning_rate": 4.879970270787372e-06,
"loss": 0.6365,
"step": 8246
},
{
"epoch": 0.52,
"grad_norm": 0.9471856951713562,
"learning_rate": 4.878944575315025e-06,
"loss": 0.5888,
"step": 8247
},
{
"epoch": 0.52,
"grad_norm": 0.8575695753097534,
"learning_rate": 4.877918884939903e-06,
"loss": 0.599,
"step": 8248
},
{
"epoch": 0.52,
"grad_norm": 0.9358868598937988,
"learning_rate": 4.8768931997051925e-06,
"loss": 0.5986,
"step": 8249
},
{
"epoch": 0.52,
"grad_norm": 0.8470869660377502,
"learning_rate": 4.8758675196540795e-06,
"loss": 0.5713,
"step": 8250
},
{
"epoch": 0.52,
"grad_norm": 0.8792859315872192,
"learning_rate": 4.874841844829753e-06,
"loss": 0.5646,
"step": 8251
},
{
"epoch": 0.52,
"grad_norm": 0.870421826839447,
"learning_rate": 4.873816175275402e-06,
"loss": 0.5701,
"step": 8252
},
{
"epoch": 0.52,
"grad_norm": 0.882820188999176,
"learning_rate": 4.8727905110342135e-06,
"loss": 0.6186,
"step": 8253
},
{
"epoch": 0.52,
"grad_norm": 0.8869359493255615,
"learning_rate": 4.871764852149373e-06,
"loss": 0.6131,
"step": 8254
},
{
"epoch": 0.52,
"grad_norm": 0.870141327381134,
"learning_rate": 4.87073919866407e-06,
"loss": 0.5999,
"step": 8255
},
{
"epoch": 0.52,
"grad_norm": 0.8610088229179382,
"learning_rate": 4.869713550621487e-06,
"loss": 0.5949,
"step": 8256
},
{
"epoch": 0.52,
"grad_norm": 0.8822341561317444,
"learning_rate": 4.868687908064815e-06,
"loss": 0.5805,
"step": 8257
},
{
"epoch": 0.52,
"grad_norm": 0.8881772756576538,
"learning_rate": 4.867662271037238e-06,
"loss": 0.5319,
"step": 8258
},
{
"epoch": 0.52,
"grad_norm": 0.9677478075027466,
"learning_rate": 4.866636639581943e-06,
"loss": 0.5925,
"step": 8259
},
{
"epoch": 0.52,
"grad_norm": 0.8670486211776733,
"learning_rate": 4.865611013742114e-06,
"loss": 0.5811,
"step": 8260
},
{
"epoch": 0.52,
"grad_norm": 0.8827394247055054,
"learning_rate": 4.864585393560939e-06,
"loss": 0.5945,
"step": 8261
},
{
"epoch": 0.52,
"grad_norm": 0.9279113411903381,
"learning_rate": 4.863559779081601e-06,
"loss": 0.5824,
"step": 8262
},
{
"epoch": 0.52,
"grad_norm": 0.8230646848678589,
"learning_rate": 4.862534170347287e-06,
"loss": 0.5946,
"step": 8263
},
{
"epoch": 0.52,
"grad_norm": 0.8288192749023438,
"learning_rate": 4.861508567401179e-06,
"loss": 0.5486,
"step": 8264
},
{
"epoch": 0.52,
"grad_norm": 0.882305383682251,
"learning_rate": 4.860482970286465e-06,
"loss": 0.5531,
"step": 8265
},
{
"epoch": 0.52,
"grad_norm": 0.881271481513977,
"learning_rate": 4.859457379046327e-06,
"loss": 0.577,
"step": 8266
},
{
"epoch": 0.52,
"grad_norm": 0.8755255937576294,
"learning_rate": 4.858431793723952e-06,
"loss": 0.5614,
"step": 8267
},
{
"epoch": 0.52,
"grad_norm": 0.8271751999855042,
"learning_rate": 4.857406214362518e-06,
"loss": 0.5615,
"step": 8268
},
{
"epoch": 0.52,
"grad_norm": 0.9304192066192627,
"learning_rate": 4.856380641005215e-06,
"loss": 0.5808,
"step": 8269
},
{
"epoch": 0.52,
"grad_norm": 0.8733910918235779,
"learning_rate": 4.855355073695223e-06,
"loss": 0.6571,
"step": 8270
},
{
"epoch": 0.52,
"grad_norm": 0.944700300693512,
"learning_rate": 4.8543295124757265e-06,
"loss": 0.5915,
"step": 8271
},
{
"epoch": 0.52,
"grad_norm": 0.9210183024406433,
"learning_rate": 4.8533039573899075e-06,
"loss": 0.6014,
"step": 8272
},
{
"epoch": 0.52,
"grad_norm": 0.8870010375976562,
"learning_rate": 4.852278408480946e-06,
"loss": 0.5976,
"step": 8273
},
{
"epoch": 0.52,
"grad_norm": 1.0010098218917847,
"learning_rate": 4.8512528657920275e-06,
"loss": 0.5804,
"step": 8274
},
{
"epoch": 0.52,
"grad_norm": 0.9052338600158691,
"learning_rate": 4.850227329366335e-06,
"loss": 0.6216,
"step": 8275
},
{
"epoch": 0.52,
"grad_norm": 0.8478895425796509,
"learning_rate": 4.849201799247049e-06,
"loss": 0.5468,
"step": 8276
},
{
"epoch": 0.52,
"grad_norm": 0.8541980981826782,
"learning_rate": 4.848176275477348e-06,
"loss": 0.5529,
"step": 8277
},
{
"epoch": 0.52,
"grad_norm": 0.881534218788147,
"learning_rate": 4.847150758100418e-06,
"loss": 0.581,
"step": 8278
},
{
"epoch": 0.52,
"grad_norm": 0.8824727535247803,
"learning_rate": 4.846125247159437e-06,
"loss": 0.5844,
"step": 8279
},
{
"epoch": 0.52,
"grad_norm": 0.861589252948761,
"learning_rate": 4.845099742697588e-06,
"loss": 0.5607,
"step": 8280
},
{
"epoch": 0.52,
"grad_norm": 0.8586124777793884,
"learning_rate": 4.844074244758047e-06,
"loss": 0.5151,
"step": 8281
},
{
"epoch": 0.52,
"grad_norm": 0.9040012955665588,
"learning_rate": 4.843048753383998e-06,
"loss": 0.586,
"step": 8282
},
{
"epoch": 0.52,
"grad_norm": 0.8967165350914001,
"learning_rate": 4.8420232686186226e-06,
"loss": 0.5654,
"step": 8283
},
{
"epoch": 0.52,
"grad_norm": 0.8572660684585571,
"learning_rate": 4.840997790505097e-06,
"loss": 0.5538,
"step": 8284
},
{
"epoch": 0.52,
"grad_norm": 0.859514594078064,
"learning_rate": 4.8399723190866e-06,
"loss": 0.5347,
"step": 8285
},
{
"epoch": 0.52,
"grad_norm": 0.8236177563667297,
"learning_rate": 4.838946854406311e-06,
"loss": 0.5735,
"step": 8286
},
{
"epoch": 0.53,
"grad_norm": 0.8584608435630798,
"learning_rate": 4.8379213965074125e-06,
"loss": 0.5974,
"step": 8287
},
{
"epoch": 0.53,
"grad_norm": 0.8580573797225952,
"learning_rate": 4.83689594543308e-06,
"loss": 0.5857,
"step": 8288
},
{
"epoch": 0.53,
"grad_norm": 0.898115873336792,
"learning_rate": 4.835870501226489e-06,
"loss": 0.6063,
"step": 8289
},
{
"epoch": 0.53,
"grad_norm": 0.8824769258499146,
"learning_rate": 4.834845063930821e-06,
"loss": 0.5794,
"step": 8290
},
{
"epoch": 0.53,
"grad_norm": 0.7949787378311157,
"learning_rate": 4.833819633589254e-06,
"loss": 0.5864,
"step": 8291
},
{
"epoch": 0.53,
"grad_norm": 0.8064171671867371,
"learning_rate": 4.832794210244965e-06,
"loss": 0.5185,
"step": 8292
},
{
"epoch": 0.53,
"grad_norm": 0.9789409041404724,
"learning_rate": 4.831768793941129e-06,
"loss": 0.6399,
"step": 8293
},
{
"epoch": 0.53,
"grad_norm": 0.8709642887115479,
"learning_rate": 4.830743384720922e-06,
"loss": 0.5817,
"step": 8294
},
{
"epoch": 0.53,
"grad_norm": 0.9149221181869507,
"learning_rate": 4.829717982627525e-06,
"loss": 0.5949,
"step": 8295
},
{
"epoch": 0.53,
"grad_norm": 0.8690757751464844,
"learning_rate": 4.82869258770411e-06,
"loss": 0.6369,
"step": 8296
},
{
"epoch": 0.53,
"grad_norm": 0.8303024172782898,
"learning_rate": 4.827667199993855e-06,
"loss": 0.5615,
"step": 8297
},
{
"epoch": 0.53,
"grad_norm": 0.8637316226959229,
"learning_rate": 4.826641819539933e-06,
"loss": 0.557,
"step": 8298
},
{
"epoch": 0.53,
"grad_norm": 0.8349990844726562,
"learning_rate": 4.825616446385523e-06,
"loss": 0.5814,
"step": 8299
},
{
"epoch": 0.53,
"grad_norm": 0.8609099388122559,
"learning_rate": 4.824591080573797e-06,
"loss": 0.5872,
"step": 8300
},
{
"epoch": 0.53,
"grad_norm": 0.92775958776474,
"learning_rate": 4.823565722147932e-06,
"loss": 0.6211,
"step": 8301
},
{
"epoch": 0.53,
"grad_norm": 0.8916222453117371,
"learning_rate": 4.8225403711511e-06,
"loss": 0.5705,
"step": 8302
},
{
"epoch": 0.53,
"grad_norm": 0.8630041480064392,
"learning_rate": 4.821515027626473e-06,
"loss": 0.5799,
"step": 8303
},
{
"epoch": 0.53,
"grad_norm": 0.8404906988143921,
"learning_rate": 4.8204896916172285e-06,
"loss": 0.5419,
"step": 8304
},
{
"epoch": 0.53,
"grad_norm": 0.8835939168930054,
"learning_rate": 4.819464363166539e-06,
"loss": 0.5335,
"step": 8305
},
{
"epoch": 0.53,
"grad_norm": 0.9106584191322327,
"learning_rate": 4.818439042317578e-06,
"loss": 0.5901,
"step": 8306
},
{
"epoch": 0.53,
"grad_norm": 0.8627772331237793,
"learning_rate": 4.817413729113516e-06,
"loss": 0.5799,
"step": 8307
},
{
"epoch": 0.53,
"grad_norm": 0.9338002800941467,
"learning_rate": 4.816388423597527e-06,
"loss": 0.5736,
"step": 8308
},
{
"epoch": 0.53,
"grad_norm": 0.9331300258636475,
"learning_rate": 4.815363125812784e-06,
"loss": 0.6421,
"step": 8309
},
{
"epoch": 0.53,
"grad_norm": 0.8660625219345093,
"learning_rate": 4.814337835802457e-06,
"loss": 0.614,
"step": 8310
},
{
"epoch": 0.53,
"grad_norm": 0.8572609424591064,
"learning_rate": 4.813312553609716e-06,
"loss": 0.5237,
"step": 8311
},
{
"epoch": 0.53,
"grad_norm": 0.8259177207946777,
"learning_rate": 4.812287279277735e-06,
"loss": 0.5701,
"step": 8312
},
{
"epoch": 0.53,
"grad_norm": 0.853283703327179,
"learning_rate": 4.811262012849685e-06,
"loss": 0.5947,
"step": 8313
},
{
"epoch": 0.53,
"grad_norm": 0.885016143321991,
"learning_rate": 4.810236754368735e-06,
"loss": 0.6032,
"step": 8314
},
{
"epoch": 0.53,
"grad_norm": 0.8650339841842651,
"learning_rate": 4.8092115038780525e-06,
"loss": 0.6111,
"step": 8315
},
{
"epoch": 0.53,
"grad_norm": 0.8747149109840393,
"learning_rate": 4.808186261420811e-06,
"loss": 0.5894,
"step": 8316
},
{
"epoch": 0.53,
"grad_norm": 0.8412078619003296,
"learning_rate": 4.80716102704018e-06,
"loss": 0.581,
"step": 8317
},
{
"epoch": 0.53,
"grad_norm": 0.917317271232605,
"learning_rate": 4.806135800779328e-06,
"loss": 0.5797,
"step": 8318
},
{
"epoch": 0.53,
"grad_norm": 0.8281989693641663,
"learning_rate": 4.805110582681421e-06,
"loss": 0.5697,
"step": 8319
},
{
"epoch": 0.53,
"grad_norm": 0.9350634217262268,
"learning_rate": 4.804085372789629e-06,
"loss": 0.6051,
"step": 8320
},
{
"epoch": 0.53,
"grad_norm": 0.9457853436470032,
"learning_rate": 4.803060171147122e-06,
"loss": 0.6187,
"step": 8321
},
{
"epoch": 0.53,
"grad_norm": 0.9334213733673096,
"learning_rate": 4.802034977797066e-06,
"loss": 0.6349,
"step": 8322
},
{
"epoch": 0.53,
"grad_norm": 0.8959923982620239,
"learning_rate": 4.801009792782627e-06,
"loss": 0.5949,
"step": 8323
},
{
"epoch": 0.53,
"grad_norm": 0.8187436461448669,
"learning_rate": 4.799984616146974e-06,
"loss": 0.5693,
"step": 8324
},
{
"epoch": 0.53,
"grad_norm": 0.896421492099762,
"learning_rate": 4.798959447933274e-06,
"loss": 0.6583,
"step": 8325
},
{
"epoch": 0.53,
"grad_norm": 0.9043596386909485,
"learning_rate": 4.797934288184692e-06,
"loss": 0.5758,
"step": 8326
},
{
"epoch": 0.53,
"grad_norm": 0.959918200969696,
"learning_rate": 4.796909136944394e-06,
"loss": 0.6453,
"step": 8327
},
{
"epoch": 0.53,
"grad_norm": 0.855787992477417,
"learning_rate": 4.795883994255544e-06,
"loss": 0.5633,
"step": 8328
},
{
"epoch": 0.53,
"grad_norm": 0.9476739764213562,
"learning_rate": 4.794858860161311e-06,
"loss": 0.674,
"step": 8329
},
{
"epoch": 0.53,
"grad_norm": 0.8766798973083496,
"learning_rate": 4.793833734704858e-06,
"loss": 0.6058,
"step": 8330
},
{
"epoch": 0.53,
"grad_norm": 0.8943171501159668,
"learning_rate": 4.792808617929348e-06,
"loss": 0.59,
"step": 8331
},
{
"epoch": 0.53,
"grad_norm": 0.8322863578796387,
"learning_rate": 4.791783509877948e-06,
"loss": 0.5921,
"step": 8332
},
{
"epoch": 0.53,
"grad_norm": 0.9394057393074036,
"learning_rate": 4.790758410593818e-06,
"loss": 0.6143,
"step": 8333
},
{
"epoch": 0.53,
"grad_norm": 0.9464383721351624,
"learning_rate": 4.789733320120124e-06,
"loss": 0.5695,
"step": 8334
},
{
"epoch": 0.53,
"grad_norm": 0.8929427266120911,
"learning_rate": 4.788708238500029e-06,
"loss": 0.5768,
"step": 8335
},
{
"epoch": 0.53,
"grad_norm": 0.8872730731964111,
"learning_rate": 4.787683165776695e-06,
"loss": 0.5809,
"step": 8336
},
{
"epoch": 0.53,
"grad_norm": 0.8962015509605408,
"learning_rate": 4.786658101993283e-06,
"loss": 0.6007,
"step": 8337
},
{
"epoch": 0.53,
"grad_norm": 0.8641744256019592,
"learning_rate": 4.785633047192959e-06,
"loss": 0.5726,
"step": 8338
},
{
"epoch": 0.53,
"grad_norm": 0.9444864988327026,
"learning_rate": 4.7846080014188786e-06,
"loss": 0.6105,
"step": 8339
},
{
"epoch": 0.53,
"grad_norm": 0.8568362593650818,
"learning_rate": 4.783582964714209e-06,
"loss": 0.6058,
"step": 8340
},
{
"epoch": 0.53,
"grad_norm": 0.8523517847061157,
"learning_rate": 4.782557937122104e-06,
"loss": 0.5627,
"step": 8341
},
{
"epoch": 0.53,
"grad_norm": 0.9169915914535522,
"learning_rate": 4.781532918685731e-06,
"loss": 0.556,
"step": 8342
},
{
"epoch": 0.53,
"grad_norm": 0.9116235375404358,
"learning_rate": 4.780507909448246e-06,
"loss": 0.6041,
"step": 8343
},
{
"epoch": 0.53,
"grad_norm": 0.9121682047843933,
"learning_rate": 4.77948290945281e-06,
"loss": 0.5696,
"step": 8344
},
{
"epoch": 0.53,
"grad_norm": 0.9193983674049377,
"learning_rate": 4.778457918742579e-06,
"loss": 0.5995,
"step": 8345
},
{
"epoch": 0.53,
"grad_norm": 0.8698511123657227,
"learning_rate": 4.777432937360716e-06,
"loss": 0.6134,
"step": 8346
},
{
"epoch": 0.53,
"grad_norm": 0.9621423482894897,
"learning_rate": 4.776407965350378e-06,
"loss": 0.5889,
"step": 8347
},
{
"epoch": 0.53,
"grad_norm": 0.9202246069908142,
"learning_rate": 4.775383002754723e-06,
"loss": 0.6282,
"step": 8348
},
{
"epoch": 0.53,
"grad_norm": 0.8784829378128052,
"learning_rate": 4.7743580496169095e-06,
"loss": 0.6325,
"step": 8349
},
{
"epoch": 0.53,
"grad_norm": 0.8858938813209534,
"learning_rate": 4.773333105980091e-06,
"loss": 0.5691,
"step": 8350
},
{
"epoch": 0.53,
"grad_norm": 0.8641536831855774,
"learning_rate": 4.772308171887427e-06,
"loss": 0.5179,
"step": 8351
},
{
"epoch": 0.53,
"grad_norm": 0.9512357115745544,
"learning_rate": 4.771283247382076e-06,
"loss": 0.6028,
"step": 8352
},
{
"epoch": 0.53,
"grad_norm": 0.852192223072052,
"learning_rate": 4.770258332507191e-06,
"loss": 0.5482,
"step": 8353
},
{
"epoch": 0.53,
"grad_norm": 0.8949208855628967,
"learning_rate": 4.7692334273059265e-06,
"loss": 0.6007,
"step": 8354
},
{
"epoch": 0.53,
"grad_norm": 0.9022393822669983,
"learning_rate": 4.768208531821441e-06,
"loss": 0.5518,
"step": 8355
},
{
"epoch": 0.53,
"grad_norm": 0.8701500296592712,
"learning_rate": 4.767183646096889e-06,
"loss": 0.5991,
"step": 8356
},
{
"epoch": 0.53,
"grad_norm": 0.8898680806159973,
"learning_rate": 4.766158770175422e-06,
"loss": 0.6007,
"step": 8357
},
{
"epoch": 0.53,
"grad_norm": 0.8867197036743164,
"learning_rate": 4.765133904100196e-06,
"loss": 0.5388,
"step": 8358
},
{
"epoch": 0.53,
"grad_norm": 0.9473680257797241,
"learning_rate": 4.764109047914365e-06,
"loss": 0.582,
"step": 8359
},
{
"epoch": 0.53,
"grad_norm": 0.9777132272720337,
"learning_rate": 4.763084201661081e-06,
"loss": 0.5981,
"step": 8360
},
{
"epoch": 0.53,
"grad_norm": 0.9255326390266418,
"learning_rate": 4.762059365383497e-06,
"loss": 0.6236,
"step": 8361
},
{
"epoch": 0.53,
"grad_norm": 0.834649920463562,
"learning_rate": 4.761034539124765e-06,
"loss": 0.5596,
"step": 8362
},
{
"epoch": 0.53,
"grad_norm": 0.970477819442749,
"learning_rate": 4.760009722928038e-06,
"loss": 0.6285,
"step": 8363
},
{
"epoch": 0.53,
"grad_norm": 0.9015950560569763,
"learning_rate": 4.7589849168364675e-06,
"loss": 0.5778,
"step": 8364
},
{
"epoch": 0.53,
"grad_norm": 0.9307251572608948,
"learning_rate": 4.7579601208932015e-06,
"loss": 0.6193,
"step": 8365
},
{
"epoch": 0.53,
"grad_norm": 0.8972157835960388,
"learning_rate": 4.756935335141395e-06,
"loss": 0.5971,
"step": 8366
},
{
"epoch": 0.53,
"grad_norm": 0.8648176193237305,
"learning_rate": 4.755910559624194e-06,
"loss": 0.5711,
"step": 8367
},
{
"epoch": 0.53,
"grad_norm": 0.8773466348648071,
"learning_rate": 4.754885794384752e-06,
"loss": 0.5989,
"step": 8368
},
{
"epoch": 0.53,
"grad_norm": 0.9213095903396606,
"learning_rate": 4.7538610394662156e-06,
"loss": 0.5402,
"step": 8369
},
{
"epoch": 0.53,
"grad_norm": 0.9090799689292908,
"learning_rate": 4.7528362949117355e-06,
"loss": 0.5578,
"step": 8370
},
{
"epoch": 0.53,
"grad_norm": 0.8713887929916382,
"learning_rate": 4.751811560764457e-06,
"loss": 0.5654,
"step": 8371
},
{
"epoch": 0.53,
"grad_norm": 0.8781121373176575,
"learning_rate": 4.750786837067532e-06,
"loss": 0.5971,
"step": 8372
},
{
"epoch": 0.53,
"grad_norm": 0.955092191696167,
"learning_rate": 4.7497621238641055e-06,
"loss": 0.612,
"step": 8373
},
{
"epoch": 0.53,
"grad_norm": 0.9178210496902466,
"learning_rate": 4.7487374211973266e-06,
"loss": 0.6232,
"step": 8374
},
{
"epoch": 0.53,
"grad_norm": 0.8522612452507019,
"learning_rate": 4.747712729110339e-06,
"loss": 0.5371,
"step": 8375
},
{
"epoch": 0.53,
"grad_norm": 0.8494625687599182,
"learning_rate": 4.746688047646293e-06,
"loss": 0.5617,
"step": 8376
},
{
"epoch": 0.53,
"grad_norm": 0.8461270332336426,
"learning_rate": 4.745663376848331e-06,
"loss": 0.5576,
"step": 8377
},
{
"epoch": 0.53,
"grad_norm": 0.9114232659339905,
"learning_rate": 4.744638716759599e-06,
"loss": 0.6225,
"step": 8378
},
{
"epoch": 0.53,
"grad_norm": 0.8230855464935303,
"learning_rate": 4.743614067423245e-06,
"loss": 0.6225,
"step": 8379
},
{
"epoch": 0.53,
"grad_norm": 0.8851600885391235,
"learning_rate": 4.742589428882406e-06,
"loss": 0.5544,
"step": 8380
},
{
"epoch": 0.53,
"grad_norm": 0.8462688326835632,
"learning_rate": 4.7415648011802335e-06,
"loss": 0.6182,
"step": 8381
},
{
"epoch": 0.53,
"grad_norm": 0.8912700414657593,
"learning_rate": 4.7405401843598686e-06,
"loss": 0.5913,
"step": 8382
},
{
"epoch": 0.53,
"grad_norm": 0.8330943584442139,
"learning_rate": 4.739515578464454e-06,
"loss": 0.5526,
"step": 8383
},
{
"epoch": 0.53,
"grad_norm": 0.9202174544334412,
"learning_rate": 4.73849098353713e-06,
"loss": 0.6407,
"step": 8384
},
{
"epoch": 0.53,
"grad_norm": 0.8993576169013977,
"learning_rate": 4.737466399621043e-06,
"loss": 0.5996,
"step": 8385
},
{
"epoch": 0.53,
"grad_norm": 0.9567261934280396,
"learning_rate": 4.736441826759332e-06,
"loss": 0.5523,
"step": 8386
},
{
"epoch": 0.53,
"grad_norm": 0.8996643424034119,
"learning_rate": 4.73541726499514e-06,
"loss": 0.5853,
"step": 8387
},
{
"epoch": 0.53,
"grad_norm": 0.8818598389625549,
"learning_rate": 4.734392714371603e-06,
"loss": 0.6365,
"step": 8388
},
{
"epoch": 0.53,
"grad_norm": 0.8730207681655884,
"learning_rate": 4.733368174931867e-06,
"loss": 0.5728,
"step": 8389
},
{
"epoch": 0.53,
"grad_norm": 0.870194673538208,
"learning_rate": 4.7323436467190705e-06,
"loss": 0.549,
"step": 8390
},
{
"epoch": 0.53,
"grad_norm": 0.9448916912078857,
"learning_rate": 4.7313191297763524e-06,
"loss": 0.5897,
"step": 8391
},
{
"epoch": 0.53,
"grad_norm": 0.859308123588562,
"learning_rate": 4.730294624146849e-06,
"loss": 0.5922,
"step": 8392
},
{
"epoch": 0.53,
"grad_norm": 0.9392966628074646,
"learning_rate": 4.729270129873701e-06,
"loss": 0.5768,
"step": 8393
},
{
"epoch": 0.53,
"grad_norm": 0.919874370098114,
"learning_rate": 4.728245647000047e-06,
"loss": 0.5809,
"step": 8394
},
{
"epoch": 0.53,
"grad_norm": 0.8396472930908203,
"learning_rate": 4.7272211755690245e-06,
"loss": 0.6086,
"step": 8395
},
{
"epoch": 0.53,
"grad_norm": 0.8304100632667542,
"learning_rate": 4.7261967156237676e-06,
"loss": 0.5668,
"step": 8396
},
{
"epoch": 0.53,
"grad_norm": 0.856200098991394,
"learning_rate": 4.725172267207413e-06,
"loss": 0.5228,
"step": 8397
},
{
"epoch": 0.53,
"grad_norm": 0.8551792502403259,
"learning_rate": 4.724147830363101e-06,
"loss": 0.5902,
"step": 8398
},
{
"epoch": 0.53,
"grad_norm": 0.8715303540229797,
"learning_rate": 4.723123405133965e-06,
"loss": 0.6097,
"step": 8399
},
{
"epoch": 0.53,
"grad_norm": 0.8818318843841553,
"learning_rate": 4.722098991563137e-06,
"loss": 0.584,
"step": 8400
},
{
"epoch": 0.53,
"grad_norm": 0.8724188804626465,
"learning_rate": 4.721074589693753e-06,
"loss": 0.5802,
"step": 8401
},
{
"epoch": 0.53,
"grad_norm": 0.8455575108528137,
"learning_rate": 4.72005019956895e-06,
"loss": 0.5751,
"step": 8402
},
{
"epoch": 0.53,
"grad_norm": 0.8873419165611267,
"learning_rate": 4.719025821231859e-06,
"loss": 0.5904,
"step": 8403
},
{
"epoch": 0.53,
"grad_norm": 0.9218294620513916,
"learning_rate": 4.718001454725612e-06,
"loss": 0.5189,
"step": 8404
},
{
"epoch": 0.53,
"grad_norm": 0.9355472326278687,
"learning_rate": 4.716977100093342e-06,
"loss": 0.6187,
"step": 8405
},
{
"epoch": 0.53,
"grad_norm": 0.881987988948822,
"learning_rate": 4.715952757378183e-06,
"loss": 0.5762,
"step": 8406
},
{
"epoch": 0.53,
"grad_norm": 0.9212351441383362,
"learning_rate": 4.714928426623266e-06,
"loss": 0.5961,
"step": 8407
},
{
"epoch": 0.53,
"grad_norm": 0.890076220035553,
"learning_rate": 4.71390410787172e-06,
"loss": 0.6016,
"step": 8408
},
{
"epoch": 0.53,
"grad_norm": 0.9456012845039368,
"learning_rate": 4.712879801166676e-06,
"loss": 0.5956,
"step": 8409
},
{
"epoch": 0.53,
"grad_norm": 0.9153468012809753,
"learning_rate": 4.711855506551267e-06,
"loss": 0.6155,
"step": 8410
},
{
"epoch": 0.53,
"grad_norm": 0.9316279292106628,
"learning_rate": 4.71083122406862e-06,
"loss": 0.5859,
"step": 8411
},
{
"epoch": 0.53,
"grad_norm": 0.879622220993042,
"learning_rate": 4.709806953761863e-06,
"loss": 0.6248,
"step": 8412
},
{
"epoch": 0.53,
"grad_norm": 0.8345575928688049,
"learning_rate": 4.7087826956741266e-06,
"loss": 0.6002,
"step": 8413
},
{
"epoch": 0.53,
"grad_norm": 0.8680174946784973,
"learning_rate": 4.707758449848536e-06,
"loss": 0.5105,
"step": 8414
},
{
"epoch": 0.53,
"grad_norm": 0.9455978870391846,
"learning_rate": 4.7067342163282225e-06,
"loss": 0.571,
"step": 8415
},
{
"epoch": 0.53,
"grad_norm": 0.828173816204071,
"learning_rate": 4.70570999515631e-06,
"loss": 0.5763,
"step": 8416
},
{
"epoch": 0.53,
"grad_norm": 0.9333354234695435,
"learning_rate": 4.704685786375927e-06,
"loss": 0.5924,
"step": 8417
},
{
"epoch": 0.53,
"grad_norm": 0.9066340923309326,
"learning_rate": 4.703661590030196e-06,
"loss": 0.5833,
"step": 8418
},
{
"epoch": 0.53,
"grad_norm": 0.8967267274856567,
"learning_rate": 4.702637406162247e-06,
"loss": 0.6445,
"step": 8419
},
{
"epoch": 0.53,
"grad_norm": 0.8336849808692932,
"learning_rate": 4.7016132348152e-06,
"loss": 0.5238,
"step": 8420
},
{
"epoch": 0.53,
"grad_norm": 1.0905916690826416,
"learning_rate": 4.700589076032184e-06,
"loss": 0.5929,
"step": 8421
},
{
"epoch": 0.53,
"grad_norm": 0.8906887173652649,
"learning_rate": 4.699564929856318e-06,
"loss": 0.6375,
"step": 8422
},
{
"epoch": 0.53,
"grad_norm": 0.8552356362342834,
"learning_rate": 4.698540796330729e-06,
"loss": 0.5987,
"step": 8423
},
{
"epoch": 0.53,
"grad_norm": 0.8900651931762695,
"learning_rate": 4.697516675498538e-06,
"loss": 0.5935,
"step": 8424
},
{
"epoch": 0.53,
"grad_norm": 0.9135156869888306,
"learning_rate": 4.69649256740287e-06,
"loss": 0.5729,
"step": 8425
},
{
"epoch": 0.53,
"grad_norm": 0.9399777054786682,
"learning_rate": 4.695468472086841e-06,
"loss": 0.6642,
"step": 8426
},
{
"epoch": 0.53,
"grad_norm": 0.9039340019226074,
"learning_rate": 4.694444389593576e-06,
"loss": 0.5794,
"step": 8427
},
{
"epoch": 0.53,
"grad_norm": 0.8976691961288452,
"learning_rate": 4.693420319966195e-06,
"loss": 0.6221,
"step": 8428
},
{
"epoch": 0.53,
"grad_norm": 0.8583334684371948,
"learning_rate": 4.692396263247818e-06,
"loss": 0.6189,
"step": 8429
},
{
"epoch": 0.53,
"grad_norm": 0.8833329677581787,
"learning_rate": 4.691372219481564e-06,
"loss": 0.5687,
"step": 8430
},
{
"epoch": 0.53,
"grad_norm": 0.8784264326095581,
"learning_rate": 4.690348188710552e-06,
"loss": 0.6358,
"step": 8431
},
{
"epoch": 0.53,
"grad_norm": 0.8706404566764832,
"learning_rate": 4.689324170977901e-06,
"loss": 0.5894,
"step": 8432
},
{
"epoch": 0.53,
"grad_norm": 0.82457035779953,
"learning_rate": 4.688300166326729e-06,
"loss": 0.5753,
"step": 8433
},
{
"epoch": 0.53,
"grad_norm": 0.8955191969871521,
"learning_rate": 4.6872761748001515e-06,
"loss": 0.5895,
"step": 8434
},
{
"epoch": 0.53,
"grad_norm": 0.8949235081672668,
"learning_rate": 4.6862521964412865e-06,
"loss": 0.6284,
"step": 8435
},
{
"epoch": 0.53,
"grad_norm": 0.9302405714988708,
"learning_rate": 4.6852282312932505e-06,
"loss": 0.6205,
"step": 8436
},
{
"epoch": 0.53,
"grad_norm": 0.9242597818374634,
"learning_rate": 4.684204279399159e-06,
"loss": 0.6397,
"step": 8437
},
{
"epoch": 0.53,
"grad_norm": 0.907974898815155,
"learning_rate": 4.683180340802126e-06,
"loss": 0.6082,
"step": 8438
},
{
"epoch": 0.53,
"grad_norm": 0.8790863752365112,
"learning_rate": 4.682156415545266e-06,
"loss": 0.5588,
"step": 8439
},
{
"epoch": 0.53,
"grad_norm": 0.8933101892471313,
"learning_rate": 4.681132503671696e-06,
"loss": 0.5786,
"step": 8440
},
{
"epoch": 0.53,
"grad_norm": 0.9507586359977722,
"learning_rate": 4.680108605224526e-06,
"loss": 0.5959,
"step": 8441
},
{
"epoch": 0.53,
"grad_norm": 0.8982723355293274,
"learning_rate": 4.679084720246869e-06,
"loss": 0.587,
"step": 8442
},
{
"epoch": 0.53,
"grad_norm": 0.8750470280647278,
"learning_rate": 4.67806084878184e-06,
"loss": 0.5799,
"step": 8443
},
{
"epoch": 0.53,
"grad_norm": 0.859678328037262,
"learning_rate": 4.677036990872546e-06,
"loss": 0.565,
"step": 8444
},
{
"epoch": 0.54,
"grad_norm": 0.8528252243995667,
"learning_rate": 4.676013146562103e-06,
"loss": 0.5786,
"step": 8445
},
{
"epoch": 0.54,
"grad_norm": 1.003036379814148,
"learning_rate": 4.674989315893618e-06,
"loss": 0.6294,
"step": 8446
},
{
"epoch": 0.54,
"grad_norm": 0.849640429019928,
"learning_rate": 4.6739654989102034e-06,
"loss": 0.5834,
"step": 8447
},
{
"epoch": 0.54,
"grad_norm": 0.9035535454750061,
"learning_rate": 4.672941695654965e-06,
"loss": 0.5989,
"step": 8448
},
{
"epoch": 0.54,
"grad_norm": 0.8059716820716858,
"learning_rate": 4.6719179061710164e-06,
"loss": 0.5307,
"step": 8449
},
{
"epoch": 0.54,
"grad_norm": 0.8504626154899597,
"learning_rate": 4.670894130501462e-06,
"loss": 0.5043,
"step": 8450
},
{
"epoch": 0.54,
"grad_norm": 0.9729040861129761,
"learning_rate": 4.669870368689414e-06,
"loss": 0.5991,
"step": 8451
},
{
"epoch": 0.54,
"grad_norm": 0.9525192975997925,
"learning_rate": 4.668846620777972e-06,
"loss": 0.6014,
"step": 8452
},
{
"epoch": 0.54,
"grad_norm": 0.8673512935638428,
"learning_rate": 4.6678228868102495e-06,
"loss": 0.6118,
"step": 8453
},
{
"epoch": 0.54,
"grad_norm": 0.8499407172203064,
"learning_rate": 4.666799166829349e-06,
"loss": 0.5924,
"step": 8454
},
{
"epoch": 0.54,
"grad_norm": 0.930546224117279,
"learning_rate": 4.665775460878377e-06,
"loss": 0.5947,
"step": 8455
},
{
"epoch": 0.54,
"grad_norm": 0.8475348949432373,
"learning_rate": 4.664751769000436e-06,
"loss": 0.5222,
"step": 8456
},
{
"epoch": 0.54,
"grad_norm": 0.9042877554893494,
"learning_rate": 4.663728091238634e-06,
"loss": 0.58,
"step": 8457
},
{
"epoch": 0.54,
"grad_norm": 0.8196518421173096,
"learning_rate": 4.662704427636071e-06,
"loss": 0.5491,
"step": 8458
},
{
"epoch": 0.54,
"grad_norm": 0.9162909984588623,
"learning_rate": 4.661680778235852e-06,
"loss": 0.5964,
"step": 8459
},
{
"epoch": 0.54,
"grad_norm": 0.907317042350769,
"learning_rate": 4.660657143081079e-06,
"loss": 0.6112,
"step": 8460
},
{
"epoch": 0.54,
"grad_norm": 0.9425126314163208,
"learning_rate": 4.65963352221485e-06,
"loss": 0.591,
"step": 8461
},
{
"epoch": 0.54,
"grad_norm": 0.8683360815048218,
"learning_rate": 4.658609915680272e-06,
"loss": 0.5176,
"step": 8462
},
{
"epoch": 0.54,
"grad_norm": 0.8283640742301941,
"learning_rate": 4.657586323520443e-06,
"loss": 0.5724,
"step": 8463
},
{
"epoch": 0.54,
"grad_norm": 0.873866617679596,
"learning_rate": 4.6565627457784625e-06,
"loss": 0.5928,
"step": 8464
},
{
"epoch": 0.54,
"grad_norm": 0.8793148398399353,
"learning_rate": 4.655539182497428e-06,
"loss": 0.5796,
"step": 8465
},
{
"epoch": 0.54,
"grad_norm": 0.8088488578796387,
"learning_rate": 4.654515633720442e-06,
"loss": 0.4934,
"step": 8466
},
{
"epoch": 0.54,
"grad_norm": 0.8243443369865417,
"learning_rate": 4.653492099490601e-06,
"loss": 0.5183,
"step": 8467
},
{
"epoch": 0.54,
"grad_norm": 0.9187846779823303,
"learning_rate": 4.6524685798510025e-06,
"loss": 0.6225,
"step": 8468
},
{
"epoch": 0.54,
"grad_norm": 0.8890441060066223,
"learning_rate": 4.651445074844742e-06,
"loss": 0.5769,
"step": 8469
},
{
"epoch": 0.54,
"grad_norm": 0.9298631548881531,
"learning_rate": 4.650421584514917e-06,
"loss": 0.5943,
"step": 8470
},
{
"epoch": 0.54,
"grad_norm": 0.8094522356987,
"learning_rate": 4.649398108904624e-06,
"loss": 0.5371,
"step": 8471
},
{
"epoch": 0.54,
"grad_norm": 0.8985278606414795,
"learning_rate": 4.648374648056957e-06,
"loss": 0.5962,
"step": 8472
},
{
"epoch": 0.54,
"grad_norm": 0.870664656162262,
"learning_rate": 4.64735120201501e-06,
"loss": 0.5929,
"step": 8473
},
{
"epoch": 0.54,
"grad_norm": 0.8985655307769775,
"learning_rate": 4.646327770821875e-06,
"loss": 0.6354,
"step": 8474
},
{
"epoch": 0.54,
"grad_norm": 0.8748487830162048,
"learning_rate": 4.64530435452065e-06,
"loss": 0.5795,
"step": 8475
},
{
"epoch": 0.54,
"grad_norm": 0.8997553586959839,
"learning_rate": 4.644280953154424e-06,
"loss": 0.6348,
"step": 8476
},
{
"epoch": 0.54,
"grad_norm": 0.9536176323890686,
"learning_rate": 4.643257566766289e-06,
"loss": 0.5866,
"step": 8477
},
{
"epoch": 0.54,
"grad_norm": 0.8656853437423706,
"learning_rate": 4.642234195399336e-06,
"loss": 0.5839,
"step": 8478
},
{
"epoch": 0.54,
"grad_norm": 0.8885663151741028,
"learning_rate": 4.641210839096659e-06,
"loss": 0.5734,
"step": 8479
},
{
"epoch": 0.54,
"grad_norm": 0.9137561917304993,
"learning_rate": 4.6401874979013455e-06,
"loss": 0.5855,
"step": 8480
},
{
"epoch": 0.54,
"grad_norm": 0.8827475905418396,
"learning_rate": 4.639164171856483e-06,
"loss": 0.6211,
"step": 8481
},
{
"epoch": 0.54,
"grad_norm": 0.9084077477455139,
"learning_rate": 4.6381408610051605e-06,
"loss": 0.5965,
"step": 8482
},
{
"epoch": 0.54,
"grad_norm": 0.9235100746154785,
"learning_rate": 4.63711756539047e-06,
"loss": 0.6093,
"step": 8483
},
{
"epoch": 0.54,
"grad_norm": 0.8328654170036316,
"learning_rate": 4.636094285055497e-06,
"loss": 0.5547,
"step": 8484
},
{
"epoch": 0.54,
"grad_norm": 0.8300716280937195,
"learning_rate": 4.635071020043326e-06,
"loss": 0.534,
"step": 8485
},
{
"epoch": 0.54,
"grad_norm": 0.914543628692627,
"learning_rate": 4.634047770397044e-06,
"loss": 0.5686,
"step": 8486
},
{
"epoch": 0.54,
"grad_norm": 0.9101009964942932,
"learning_rate": 4.633024536159739e-06,
"loss": 0.5694,
"step": 8487
},
{
"epoch": 0.54,
"grad_norm": 0.8731689453125,
"learning_rate": 4.632001317374495e-06,
"loss": 0.5888,
"step": 8488
},
{
"epoch": 0.54,
"grad_norm": 0.8360764384269714,
"learning_rate": 4.630978114084394e-06,
"loss": 0.5559,
"step": 8489
},
{
"epoch": 0.54,
"grad_norm": 0.956150233745575,
"learning_rate": 4.629954926332522e-06,
"loss": 0.6186,
"step": 8490
},
{
"epoch": 0.54,
"grad_norm": 0.9069817066192627,
"learning_rate": 4.628931754161959e-06,
"loss": 0.5812,
"step": 8491
},
{
"epoch": 0.54,
"grad_norm": 0.8588123917579651,
"learning_rate": 4.62790859761579e-06,
"loss": 0.5806,
"step": 8492
},
{
"epoch": 0.54,
"grad_norm": 0.9087151288986206,
"learning_rate": 4.626885456737095e-06,
"loss": 0.6061,
"step": 8493
},
{
"epoch": 0.54,
"grad_norm": 0.9100707173347473,
"learning_rate": 4.625862331568957e-06,
"loss": 0.5807,
"step": 8494
},
{
"epoch": 0.54,
"grad_norm": 0.9260814785957336,
"learning_rate": 4.624839222154453e-06,
"loss": 0.5917,
"step": 8495
},
{
"epoch": 0.54,
"grad_norm": 0.9124268293380737,
"learning_rate": 4.623816128536665e-06,
"loss": 0.5771,
"step": 8496
},
{
"epoch": 0.54,
"grad_norm": 0.9149200320243835,
"learning_rate": 4.6227930507586705e-06,
"loss": 0.5622,
"step": 8497
},
{
"epoch": 0.54,
"grad_norm": 0.9365261197090149,
"learning_rate": 4.62176998886355e-06,
"loss": 0.6093,
"step": 8498
},
{
"epoch": 0.54,
"grad_norm": 0.8503932952880859,
"learning_rate": 4.620746942894377e-06,
"loss": 0.6117,
"step": 8499
},
{
"epoch": 0.54,
"grad_norm": 0.8979615569114685,
"learning_rate": 4.619723912894232e-06,
"loss": 0.5852,
"step": 8500
},
{
"epoch": 0.54,
"grad_norm": 0.8997284770011902,
"learning_rate": 4.618700898906191e-06,
"loss": 0.5506,
"step": 8501
},
{
"epoch": 0.54,
"grad_norm": 0.8395345211029053,
"learning_rate": 4.6176779009733295e-06,
"loss": 0.5371,
"step": 8502
},
{
"epoch": 0.54,
"grad_norm": 0.8006191253662109,
"learning_rate": 4.616654919138719e-06,
"loss": 0.5285,
"step": 8503
},
{
"epoch": 0.54,
"grad_norm": 0.9190979599952698,
"learning_rate": 4.6156319534454365e-06,
"loss": 0.6529,
"step": 8504
},
{
"epoch": 0.54,
"grad_norm": 0.875033438205719,
"learning_rate": 4.614609003936558e-06,
"loss": 0.5774,
"step": 8505
},
{
"epoch": 0.54,
"grad_norm": 0.8436771035194397,
"learning_rate": 4.613586070655152e-06,
"loss": 0.5751,
"step": 8506
},
{
"epoch": 0.54,
"grad_norm": 0.8874161243438721,
"learning_rate": 4.612563153644292e-06,
"loss": 0.581,
"step": 8507
},
{
"epoch": 0.54,
"grad_norm": 0.8197293281555176,
"learning_rate": 4.6115402529470495e-06,
"loss": 0.6048,
"step": 8508
},
{
"epoch": 0.54,
"grad_norm": 0.901355504989624,
"learning_rate": 4.610517368606497e-06,
"loss": 0.5648,
"step": 8509
},
{
"epoch": 0.54,
"grad_norm": 0.8736656308174133,
"learning_rate": 4.609494500665703e-06,
"loss": 0.5775,
"step": 8510
},
{
"epoch": 0.54,
"grad_norm": 0.9123381972312927,
"learning_rate": 4.608471649167737e-06,
"loss": 0.5824,
"step": 8511
},
{
"epoch": 0.54,
"grad_norm": 0.895682156085968,
"learning_rate": 4.6074488141556656e-06,
"loss": 0.6338,
"step": 8512
},
{
"epoch": 0.54,
"grad_norm": 0.8595967292785645,
"learning_rate": 4.606425995672562e-06,
"loss": 0.5794,
"step": 8513
},
{
"epoch": 0.54,
"grad_norm": 0.8449206948280334,
"learning_rate": 4.605403193761489e-06,
"loss": 0.5957,
"step": 8514
},
{
"epoch": 0.54,
"grad_norm": 0.8243349194526672,
"learning_rate": 4.604380408465516e-06,
"loss": 0.55,
"step": 8515
},
{
"epoch": 0.54,
"grad_norm": 0.8854864239692688,
"learning_rate": 4.603357639827705e-06,
"loss": 0.6184,
"step": 8516
},
{
"epoch": 0.54,
"grad_norm": 0.9017980098724365,
"learning_rate": 4.602334887891127e-06,
"loss": 0.6258,
"step": 8517
},
{
"epoch": 0.54,
"grad_norm": 0.9609394669532776,
"learning_rate": 4.601312152698843e-06,
"loss": 0.6221,
"step": 8518
},
{
"epoch": 0.54,
"grad_norm": 0.9184016585350037,
"learning_rate": 4.600289434293917e-06,
"loss": 0.5955,
"step": 8519
},
{
"epoch": 0.54,
"grad_norm": 0.9105634093284607,
"learning_rate": 4.599266732719413e-06,
"loss": 0.5936,
"step": 8520
},
{
"epoch": 0.54,
"grad_norm": 0.8601149320602417,
"learning_rate": 4.598244048018391e-06,
"loss": 0.5765,
"step": 8521
},
{
"epoch": 0.54,
"grad_norm": 0.8680559396743774,
"learning_rate": 4.5972213802339165e-06,
"loss": 0.6048,
"step": 8522
},
{
"epoch": 0.54,
"grad_norm": 0.8764021396636963,
"learning_rate": 4.596198729409047e-06,
"loss": 0.6259,
"step": 8523
},
{
"epoch": 0.54,
"grad_norm": 1.0313016176223755,
"learning_rate": 4.5951760955868455e-06,
"loss": 0.5857,
"step": 8524
},
{
"epoch": 0.54,
"grad_norm": 0.9298897385597229,
"learning_rate": 4.594153478810368e-06,
"loss": 0.603,
"step": 8525
},
{
"epoch": 0.54,
"grad_norm": 0.904453456401825,
"learning_rate": 4.593130879122678e-06,
"loss": 0.5608,
"step": 8526
},
{
"epoch": 0.54,
"grad_norm": 0.9432054162025452,
"learning_rate": 4.59210829656683e-06,
"loss": 0.5982,
"step": 8527
},
{
"epoch": 0.54,
"grad_norm": 0.8348836302757263,
"learning_rate": 4.591085731185885e-06,
"loss": 0.5458,
"step": 8528
},
{
"epoch": 0.54,
"grad_norm": 0.9127042293548584,
"learning_rate": 4.590063183022894e-06,
"loss": 0.5765,
"step": 8529
},
{
"epoch": 0.54,
"grad_norm": 0.8295519351959229,
"learning_rate": 4.589040652120919e-06,
"loss": 0.5914,
"step": 8530
},
{
"epoch": 0.54,
"grad_norm": 0.9889672994613647,
"learning_rate": 4.588018138523011e-06,
"loss": 0.5926,
"step": 8531
},
{
"epoch": 0.54,
"grad_norm": 0.8858618140220642,
"learning_rate": 4.5869956422722274e-06,
"loss": 0.5637,
"step": 8532
},
{
"epoch": 0.54,
"grad_norm": 0.8900063633918762,
"learning_rate": 4.585973163411618e-06,
"loss": 0.5885,
"step": 8533
},
{
"epoch": 0.54,
"grad_norm": 0.8373422026634216,
"learning_rate": 4.584950701984241e-06,
"loss": 0.56,
"step": 8534
},
{
"epoch": 0.54,
"grad_norm": 0.8420644998550415,
"learning_rate": 4.583928258033145e-06,
"loss": 0.5761,
"step": 8535
},
{
"epoch": 0.54,
"grad_norm": 0.8812116980552673,
"learning_rate": 4.5829058316013835e-06,
"loss": 0.5584,
"step": 8536
},
{
"epoch": 0.54,
"grad_norm": 0.967736542224884,
"learning_rate": 4.581883422732007e-06,
"loss": 0.615,
"step": 8537
},
{
"epoch": 0.54,
"grad_norm": 0.8752156496047974,
"learning_rate": 4.580861031468062e-06,
"loss": 0.5622,
"step": 8538
},
{
"epoch": 0.54,
"grad_norm": 0.845308780670166,
"learning_rate": 4.579838657852603e-06,
"loss": 0.5925,
"step": 8539
},
{
"epoch": 0.54,
"grad_norm": 0.8537322282791138,
"learning_rate": 4.578816301928677e-06,
"loss": 0.5804,
"step": 8540
},
{
"epoch": 0.54,
"grad_norm": 0.8931176066398621,
"learning_rate": 4.577793963739331e-06,
"loss": 0.5816,
"step": 8541
},
{
"epoch": 0.54,
"grad_norm": 0.855497419834137,
"learning_rate": 4.576771643327611e-06,
"loss": 0.5514,
"step": 8542
},
{
"epoch": 0.54,
"grad_norm": 0.8563072681427002,
"learning_rate": 4.575749340736565e-06,
"loss": 0.5706,
"step": 8543
},
{
"epoch": 0.54,
"grad_norm": 0.8625338673591614,
"learning_rate": 4.57472705600924e-06,
"loss": 0.5584,
"step": 8544
},
{
"epoch": 0.54,
"grad_norm": 0.9388693571090698,
"learning_rate": 4.573704789188679e-06,
"loss": 0.6424,
"step": 8545
},
{
"epoch": 0.54,
"grad_norm": 0.8577854633331299,
"learning_rate": 4.5726825403179245e-06,
"loss": 0.6327,
"step": 8546
},
{
"epoch": 0.54,
"grad_norm": 0.8097984194755554,
"learning_rate": 4.571660309440022e-06,
"loss": 0.5456,
"step": 8547
},
{
"epoch": 0.54,
"grad_norm": 0.9322377443313599,
"learning_rate": 4.570638096598016e-06,
"loss": 0.6238,
"step": 8548
},
{
"epoch": 0.54,
"grad_norm": 0.9196782112121582,
"learning_rate": 4.569615901834946e-06,
"loss": 0.578,
"step": 8549
},
{
"epoch": 0.54,
"grad_norm": 0.9435470700263977,
"learning_rate": 4.568593725193852e-06,
"loss": 0.5887,
"step": 8550
},
{
"epoch": 0.54,
"grad_norm": 0.8405277132987976,
"learning_rate": 4.567571566717774e-06,
"loss": 0.5792,
"step": 8551
},
{
"epoch": 0.54,
"grad_norm": 0.8100456595420837,
"learning_rate": 4.566549426449755e-06,
"loss": 0.5389,
"step": 8552
},
{
"epoch": 0.54,
"grad_norm": 0.8953537940979004,
"learning_rate": 4.565527304432833e-06,
"loss": 0.5842,
"step": 8553
},
{
"epoch": 0.54,
"grad_norm": 0.8631918430328369,
"learning_rate": 4.564505200710042e-06,
"loss": 0.5341,
"step": 8554
},
{
"epoch": 0.54,
"grad_norm": 0.8625524640083313,
"learning_rate": 4.5634831153244215e-06,
"loss": 0.5662,
"step": 8555
},
{
"epoch": 0.54,
"grad_norm": 0.8663583993911743,
"learning_rate": 4.562461048319011e-06,
"loss": 0.563,
"step": 8556
},
{
"epoch": 0.54,
"grad_norm": 0.9523765444755554,
"learning_rate": 4.561438999736844e-06,
"loss": 0.6671,
"step": 8557
},
{
"epoch": 0.54,
"grad_norm": 0.9273942708969116,
"learning_rate": 4.5604169696209535e-06,
"loss": 0.6043,
"step": 8558
},
{
"epoch": 0.54,
"grad_norm": 0.8723426461219788,
"learning_rate": 4.559394958014375e-06,
"loss": 0.6176,
"step": 8559
},
{
"epoch": 0.54,
"grad_norm": 0.9040724635124207,
"learning_rate": 4.558372964960142e-06,
"loss": 0.594,
"step": 8560
},
{
"epoch": 0.54,
"grad_norm": 0.8431350588798523,
"learning_rate": 4.557350990501288e-06,
"loss": 0.5618,
"step": 8561
},
{
"epoch": 0.54,
"grad_norm": 0.8898385763168335,
"learning_rate": 4.556329034680845e-06,
"loss": 0.5719,
"step": 8562
},
{
"epoch": 0.54,
"grad_norm": 0.9010729789733887,
"learning_rate": 4.55530709754184e-06,
"loss": 0.6032,
"step": 8563
},
{
"epoch": 0.54,
"grad_norm": 0.9148017764091492,
"learning_rate": 4.5542851791273085e-06,
"loss": 0.6184,
"step": 8564
},
{
"epoch": 0.54,
"grad_norm": 0.8323689103126526,
"learning_rate": 4.5532632794802766e-06,
"loss": 0.5297,
"step": 8565
},
{
"epoch": 0.54,
"grad_norm": 0.8298326134681702,
"learning_rate": 4.5522413986437745e-06,
"loss": 0.5933,
"step": 8566
},
{
"epoch": 0.54,
"grad_norm": 0.9563860893249512,
"learning_rate": 4.55121953666083e-06,
"loss": 0.5663,
"step": 8567
},
{
"epoch": 0.54,
"grad_norm": 0.8822575211524963,
"learning_rate": 4.550197693574468e-06,
"loss": 0.5713,
"step": 8568
},
{
"epoch": 0.54,
"grad_norm": 0.9293971657752991,
"learning_rate": 4.549175869427717e-06,
"loss": 0.5852,
"step": 8569
},
{
"epoch": 0.54,
"grad_norm": 0.846694827079773,
"learning_rate": 4.548154064263603e-06,
"loss": 0.558,
"step": 8570
},
{
"epoch": 0.54,
"grad_norm": 1.0178662538528442,
"learning_rate": 4.547132278125149e-06,
"loss": 0.5941,
"step": 8571
},
{
"epoch": 0.54,
"grad_norm": 0.9423683881759644,
"learning_rate": 4.546110511055377e-06,
"loss": 0.5525,
"step": 8572
},
{
"epoch": 0.54,
"grad_norm": 0.8660984039306641,
"learning_rate": 4.545088763097314e-06,
"loss": 0.5657,
"step": 8573
},
{
"epoch": 0.54,
"grad_norm": 0.8998304605484009,
"learning_rate": 4.544067034293982e-06,
"loss": 0.5886,
"step": 8574
},
{
"epoch": 0.54,
"grad_norm": 0.8911488056182861,
"learning_rate": 4.543045324688401e-06,
"loss": 0.55,
"step": 8575
},
{
"epoch": 0.54,
"grad_norm": 0.8904201984405518,
"learning_rate": 4.542023634323589e-06,
"loss": 0.5812,
"step": 8576
},
{
"epoch": 0.54,
"grad_norm": 0.8831244111061096,
"learning_rate": 4.54100196324257e-06,
"loss": 0.5895,
"step": 8577
},
{
"epoch": 0.54,
"grad_norm": 0.877057671546936,
"learning_rate": 4.539980311488363e-06,
"loss": 0.5412,
"step": 8578
},
{
"epoch": 0.54,
"grad_norm": 0.8660837411880493,
"learning_rate": 4.538958679103984e-06,
"loss": 0.5673,
"step": 8579
},
{
"epoch": 0.54,
"grad_norm": 0.8540948033332825,
"learning_rate": 4.5379370661324495e-06,
"loss": 0.5545,
"step": 8580
},
{
"epoch": 0.54,
"grad_norm": 0.8788366317749023,
"learning_rate": 4.536915472616779e-06,
"loss": 0.577,
"step": 8581
},
{
"epoch": 0.54,
"grad_norm": 0.8697229623794556,
"learning_rate": 4.535893898599988e-06,
"loss": 0.5517,
"step": 8582
},
{
"epoch": 0.54,
"grad_norm": 0.9252592921257019,
"learning_rate": 4.53487234412509e-06,
"loss": 0.5577,
"step": 8583
},
{
"epoch": 0.54,
"grad_norm": 0.8896933197975159,
"learning_rate": 4.533850809235099e-06,
"loss": 0.6227,
"step": 8584
},
{
"epoch": 0.54,
"grad_norm": 0.9034548401832581,
"learning_rate": 4.532829293973028e-06,
"loss": 0.6235,
"step": 8585
},
{
"epoch": 0.54,
"grad_norm": 0.9039227962493896,
"learning_rate": 4.531807798381892e-06,
"loss": 0.5804,
"step": 8586
},
{
"epoch": 0.54,
"grad_norm": 0.8506115674972534,
"learning_rate": 4.5307863225047e-06,
"loss": 0.5546,
"step": 8587
},
{
"epoch": 0.54,
"grad_norm": 0.9655522108078003,
"learning_rate": 4.529764866384464e-06,
"loss": 0.6156,
"step": 8588
},
{
"epoch": 0.54,
"grad_norm": 0.8476807475090027,
"learning_rate": 4.528743430064192e-06,
"loss": 0.5422,
"step": 8589
},
{
"epoch": 0.54,
"grad_norm": 0.9019517302513123,
"learning_rate": 4.527722013586897e-06,
"loss": 0.5895,
"step": 8590
},
{
"epoch": 0.54,
"grad_norm": 0.9369930624961853,
"learning_rate": 4.5267006169955855e-06,
"loss": 0.6462,
"step": 8591
},
{
"epoch": 0.54,
"grad_norm": 0.9092143774032593,
"learning_rate": 4.525679240333262e-06,
"loss": 0.5999,
"step": 8592
},
{
"epoch": 0.54,
"grad_norm": 0.8883408904075623,
"learning_rate": 4.524657883642936e-06,
"loss": 0.629,
"step": 8593
},
{
"epoch": 0.54,
"grad_norm": 0.8866313099861145,
"learning_rate": 4.5236365469676144e-06,
"loss": 0.5621,
"step": 8594
},
{
"epoch": 0.54,
"grad_norm": 0.8671311736106873,
"learning_rate": 4.522615230350302e-06,
"loss": 0.633,
"step": 8595
},
{
"epoch": 0.54,
"grad_norm": 0.8295920491218567,
"learning_rate": 4.521593933833998e-06,
"loss": 0.5545,
"step": 8596
},
{
"epoch": 0.54,
"grad_norm": 0.8872106671333313,
"learning_rate": 4.520572657461712e-06,
"loss": 0.5561,
"step": 8597
},
{
"epoch": 0.54,
"grad_norm": 0.9319486021995544,
"learning_rate": 4.519551401276441e-06,
"loss": 0.5819,
"step": 8598
},
{
"epoch": 0.54,
"grad_norm": 0.8339968323707581,
"learning_rate": 4.518530165321192e-06,
"loss": 0.5802,
"step": 8599
},
{
"epoch": 0.54,
"grad_norm": 0.910203218460083,
"learning_rate": 4.517508949638961e-06,
"loss": 0.5946,
"step": 8600
},
{
"epoch": 0.54,
"grad_norm": 0.8642169833183289,
"learning_rate": 4.516487754272751e-06,
"loss": 0.6044,
"step": 8601
},
{
"epoch": 0.54,
"grad_norm": 0.8620786070823669,
"learning_rate": 4.515466579265557e-06,
"loss": 0.5943,
"step": 8602
},
{
"epoch": 0.55,
"grad_norm": 0.865822970867157,
"learning_rate": 4.5144454246603816e-06,
"loss": 0.5797,
"step": 8603
},
{
"epoch": 0.55,
"grad_norm": 0.9090112447738647,
"learning_rate": 4.51342429050022e-06,
"loss": 0.5848,
"step": 8604
},
{
"epoch": 0.55,
"grad_norm": 0.8540524244308472,
"learning_rate": 4.51240317682807e-06,
"loss": 0.6534,
"step": 8605
},
{
"epoch": 0.55,
"grad_norm": 0.8696991801261902,
"learning_rate": 4.5113820836869234e-06,
"loss": 0.5707,
"step": 8606
},
{
"epoch": 0.55,
"grad_norm": 0.9889391660690308,
"learning_rate": 4.51036101111978e-06,
"loss": 0.6631,
"step": 8607
},
{
"epoch": 0.55,
"grad_norm": 0.8236830234527588,
"learning_rate": 4.509339959169629e-06,
"loss": 0.55,
"step": 8608
},
{
"epoch": 0.55,
"grad_norm": 0.9895977973937988,
"learning_rate": 4.508318927879468e-06,
"loss": 0.6829,
"step": 8609
},
{
"epoch": 0.55,
"grad_norm": 0.8496975302696228,
"learning_rate": 4.507297917292284e-06,
"loss": 0.537,
"step": 8610
},
{
"epoch": 0.55,
"grad_norm": 0.8107864260673523,
"learning_rate": 4.506276927451072e-06,
"loss": 0.5522,
"step": 8611
},
{
"epoch": 0.55,
"grad_norm": 0.8916783928871155,
"learning_rate": 4.505255958398821e-06,
"loss": 0.6091,
"step": 8612
},
{
"epoch": 0.55,
"grad_norm": 0.9526239037513733,
"learning_rate": 4.504235010178521e-06,
"loss": 0.5811,
"step": 8613
},
{
"epoch": 0.55,
"grad_norm": 0.9013256430625916,
"learning_rate": 4.503214082833161e-06,
"loss": 0.5874,
"step": 8614
},
{
"epoch": 0.55,
"grad_norm": 0.8835528492927551,
"learning_rate": 4.502193176405724e-06,
"loss": 0.5748,
"step": 8615
},
{
"epoch": 0.55,
"grad_norm": 0.8880060315132141,
"learning_rate": 4.501172290939203e-06,
"loss": 0.559,
"step": 8616
},
{
"epoch": 0.55,
"grad_norm": 0.8688036799430847,
"learning_rate": 4.5001514264765826e-06,
"loss": 0.5764,
"step": 8617
},
{
"epoch": 0.55,
"grad_norm": 0.8470838069915771,
"learning_rate": 4.499130583060845e-06,
"loss": 0.558,
"step": 8618
},
{
"epoch": 0.55,
"grad_norm": 0.8374934196472168,
"learning_rate": 4.4981097607349764e-06,
"loss": 0.5753,
"step": 8619
},
{
"epoch": 0.55,
"grad_norm": 0.8633030652999878,
"learning_rate": 4.49708895954196e-06,
"loss": 0.5417,
"step": 8620
},
{
"epoch": 0.55,
"grad_norm": 0.9553747177124023,
"learning_rate": 4.496068179524778e-06,
"loss": 0.6072,
"step": 8621
},
{
"epoch": 0.55,
"grad_norm": 0.9089109301567078,
"learning_rate": 4.495047420726412e-06,
"loss": 0.6185,
"step": 8622
},
{
"epoch": 0.55,
"grad_norm": 0.881161630153656,
"learning_rate": 4.494026683189843e-06,
"loss": 0.6265,
"step": 8623
},
{
"epoch": 0.55,
"grad_norm": 0.943405032157898,
"learning_rate": 4.493005966958049e-06,
"loss": 0.5904,
"step": 8624
},
{
"epoch": 0.55,
"grad_norm": 0.8863667845726013,
"learning_rate": 4.4919852720740115e-06,
"loss": 0.5585,
"step": 8625
},
{
"epoch": 0.55,
"grad_norm": 0.9154953956604004,
"learning_rate": 4.490964598580706e-06,
"loss": 0.6232,
"step": 8626
},
{
"epoch": 0.55,
"grad_norm": 0.885014533996582,
"learning_rate": 4.489943946521111e-06,
"loss": 0.5734,
"step": 8627
},
{
"epoch": 0.55,
"grad_norm": 0.9125478267669678,
"learning_rate": 4.4889233159382e-06,
"loss": 0.6038,
"step": 8628
},
{
"epoch": 0.55,
"grad_norm": 0.9338325262069702,
"learning_rate": 4.487902706874954e-06,
"loss": 0.6203,
"step": 8629
},
{
"epoch": 0.55,
"grad_norm": 0.98078852891922,
"learning_rate": 4.486882119374341e-06,
"loss": 0.6177,
"step": 8630
},
{
"epoch": 0.55,
"grad_norm": 0.9384462833404541,
"learning_rate": 4.485861553479338e-06,
"loss": 0.6567,
"step": 8631
},
{
"epoch": 0.55,
"grad_norm": 0.9040749669075012,
"learning_rate": 4.484841009232914e-06,
"loss": 0.6106,
"step": 8632
},
{
"epoch": 0.55,
"grad_norm": 0.8856215476989746,
"learning_rate": 4.483820486678047e-06,
"loss": 0.5848,
"step": 8633
},
{
"epoch": 0.55,
"grad_norm": 0.9175398349761963,
"learning_rate": 4.482799985857701e-06,
"loss": 0.6391,
"step": 8634
},
{
"epoch": 0.55,
"grad_norm": 0.8846474885940552,
"learning_rate": 4.48177950681485e-06,
"loss": 0.568,
"step": 8635
},
{
"epoch": 0.55,
"grad_norm": 0.8936927318572998,
"learning_rate": 4.480759049592458e-06,
"loss": 0.6202,
"step": 8636
},
{
"epoch": 0.55,
"grad_norm": 0.8895360231399536,
"learning_rate": 4.4797386142335e-06,
"loss": 0.5881,
"step": 8637
},
{
"epoch": 0.55,
"grad_norm": 0.8970397710800171,
"learning_rate": 4.478718200780936e-06,
"loss": 0.622,
"step": 8638
},
{
"epoch": 0.55,
"grad_norm": 0.8260961771011353,
"learning_rate": 4.477697809277738e-06,
"loss": 0.5248,
"step": 8639
},
{
"epoch": 0.55,
"grad_norm": 0.8553221821784973,
"learning_rate": 4.476677439766865e-06,
"loss": 0.5772,
"step": 8640
},
{
"epoch": 0.55,
"grad_norm": 0.9180177450180054,
"learning_rate": 4.475657092291287e-06,
"loss": 0.6119,
"step": 8641
},
{
"epoch": 0.55,
"grad_norm": 0.9326549172401428,
"learning_rate": 4.4746367668939646e-06,
"loss": 0.6103,
"step": 8642
},
{
"epoch": 0.55,
"grad_norm": 0.9045451879501343,
"learning_rate": 4.4736164636178605e-06,
"loss": 0.5519,
"step": 8643
},
{
"epoch": 0.55,
"grad_norm": 0.8994148969650269,
"learning_rate": 4.472596182505936e-06,
"loss": 0.5811,
"step": 8644
},
{
"epoch": 0.55,
"grad_norm": 0.9174748659133911,
"learning_rate": 4.47157592360115e-06,
"loss": 0.5954,
"step": 8645
},
{
"epoch": 0.55,
"grad_norm": 0.881083607673645,
"learning_rate": 4.470555686946464e-06,
"loss": 0.5622,
"step": 8646
},
{
"epoch": 0.55,
"grad_norm": 0.9171141982078552,
"learning_rate": 4.469535472584837e-06,
"loss": 0.5954,
"step": 8647
},
{
"epoch": 0.55,
"grad_norm": 0.9388046264648438,
"learning_rate": 4.468515280559227e-06,
"loss": 0.598,
"step": 8648
},
{
"epoch": 0.55,
"grad_norm": 0.8907988667488098,
"learning_rate": 4.467495110912587e-06,
"loss": 0.6237,
"step": 8649
},
{
"epoch": 0.55,
"grad_norm": 0.8721764087677002,
"learning_rate": 4.466474963687876e-06,
"loss": 0.6328,
"step": 8650
},
{
"epoch": 0.55,
"grad_norm": 0.8680887222290039,
"learning_rate": 4.46545483892805e-06,
"loss": 0.6172,
"step": 8651
},
{
"epoch": 0.55,
"grad_norm": 1.012911081314087,
"learning_rate": 4.464434736676061e-06,
"loss": 0.5685,
"step": 8652
},
{
"epoch": 0.55,
"grad_norm": 0.9347690343856812,
"learning_rate": 4.46341465697486e-06,
"loss": 0.6096,
"step": 8653
},
{
"epoch": 0.55,
"grad_norm": 0.7885945439338684,
"learning_rate": 4.462394599867402e-06,
"loss": 0.5374,
"step": 8654
},
{
"epoch": 0.55,
"grad_norm": 0.8694291114807129,
"learning_rate": 4.461374565396638e-06,
"loss": 0.561,
"step": 8655
},
{
"epoch": 0.55,
"grad_norm": 0.8917961716651917,
"learning_rate": 4.460354553605518e-06,
"loss": 0.6115,
"step": 8656
},
{
"epoch": 0.55,
"grad_norm": 0.8918887376785278,
"learning_rate": 4.459334564536988e-06,
"loss": 0.6392,
"step": 8657
},
{
"epoch": 0.55,
"grad_norm": 0.8710853457450867,
"learning_rate": 4.458314598234e-06,
"loss": 0.579,
"step": 8658
},
{
"epoch": 0.55,
"grad_norm": 0.8684004545211792,
"learning_rate": 4.4572946547395e-06,
"loss": 0.5717,
"step": 8659
},
{
"epoch": 0.55,
"grad_norm": 0.8856059908866882,
"learning_rate": 4.456274734096436e-06,
"loss": 0.5882,
"step": 8660
},
{
"epoch": 0.55,
"grad_norm": 0.9372711181640625,
"learning_rate": 4.455254836347749e-06,
"loss": 0.6284,
"step": 8661
},
{
"epoch": 0.55,
"grad_norm": 0.8738864064216614,
"learning_rate": 4.454234961536384e-06,
"loss": 0.5937,
"step": 8662
},
{
"epoch": 0.55,
"grad_norm": 0.8427755832672119,
"learning_rate": 4.45321510970529e-06,
"loss": 0.5822,
"step": 8663
},
{
"epoch": 0.55,
"grad_norm": 0.8868844509124756,
"learning_rate": 4.452195280897405e-06,
"loss": 0.5819,
"step": 8664
},
{
"epoch": 0.55,
"grad_norm": 0.8627145290374756,
"learning_rate": 4.451175475155669e-06,
"loss": 0.5566,
"step": 8665
},
{
"epoch": 0.55,
"grad_norm": 0.9276217222213745,
"learning_rate": 4.450155692523025e-06,
"loss": 0.5756,
"step": 8666
},
{
"epoch": 0.55,
"grad_norm": 0.8811351656913757,
"learning_rate": 4.449135933042414e-06,
"loss": 0.5945,
"step": 8667
},
{
"epoch": 0.55,
"grad_norm": 0.9855297803878784,
"learning_rate": 4.448116196756771e-06,
"loss": 0.6194,
"step": 8668
},
{
"epoch": 0.55,
"grad_norm": 0.813127338886261,
"learning_rate": 4.447096483709035e-06,
"loss": 0.5267,
"step": 8669
},
{
"epoch": 0.55,
"grad_norm": 0.8596179485321045,
"learning_rate": 4.4460767939421425e-06,
"loss": 0.5878,
"step": 8670
},
{
"epoch": 0.55,
"grad_norm": 0.9059436917304993,
"learning_rate": 4.44505712749903e-06,
"loss": 0.5663,
"step": 8671
},
{
"epoch": 0.55,
"grad_norm": 0.9185728430747986,
"learning_rate": 4.444037484422632e-06,
"loss": 0.6195,
"step": 8672
},
{
"epoch": 0.55,
"grad_norm": 0.8514521718025208,
"learning_rate": 4.44301786475588e-06,
"loss": 0.5572,
"step": 8673
},
{
"epoch": 0.55,
"grad_norm": 0.8482089042663574,
"learning_rate": 4.441998268541708e-06,
"loss": 0.5652,
"step": 8674
},
{
"epoch": 0.55,
"grad_norm": 0.8797301054000854,
"learning_rate": 4.440978695823049e-06,
"loss": 0.5766,
"step": 8675
},
{
"epoch": 0.55,
"grad_norm": 0.8916350603103638,
"learning_rate": 4.439959146642833e-06,
"loss": 0.5874,
"step": 8676
},
{
"epoch": 0.55,
"grad_norm": 0.9491859078407288,
"learning_rate": 4.4389396210439886e-06,
"loss": 0.5796,
"step": 8677
},
{
"epoch": 0.55,
"grad_norm": 0.8709646463394165,
"learning_rate": 4.437920119069445e-06,
"loss": 0.6025,
"step": 8678
},
{
"epoch": 0.55,
"grad_norm": 0.9544169306755066,
"learning_rate": 4.436900640762128e-06,
"loss": 0.6039,
"step": 8679
},
{
"epoch": 0.55,
"grad_norm": 0.9094875454902649,
"learning_rate": 4.435881186164968e-06,
"loss": 0.595,
"step": 8680
},
{
"epoch": 0.55,
"grad_norm": 0.9330776333808899,
"learning_rate": 4.434861755320888e-06,
"loss": 0.5634,
"step": 8681
},
{
"epoch": 0.55,
"grad_norm": 0.8398060202598572,
"learning_rate": 4.433842348272815e-06,
"loss": 0.573,
"step": 8682
},
{
"epoch": 0.55,
"grad_norm": 0.8635173439979553,
"learning_rate": 4.4328229650636676e-06,
"loss": 0.6207,
"step": 8683
},
{
"epoch": 0.55,
"grad_norm": 0.8932275176048279,
"learning_rate": 4.431803605736376e-06,
"loss": 0.5626,
"step": 8684
},
{
"epoch": 0.55,
"grad_norm": 0.8580974340438843,
"learning_rate": 4.430784270333855e-06,
"loss": 0.6231,
"step": 8685
},
{
"epoch": 0.55,
"grad_norm": 0.8090934753417969,
"learning_rate": 4.429764958899031e-06,
"loss": 0.524,
"step": 8686
},
{
"epoch": 0.55,
"grad_norm": 0.8802515864372253,
"learning_rate": 4.428745671474818e-06,
"loss": 0.5884,
"step": 8687
},
{
"epoch": 0.55,
"grad_norm": 0.9674070477485657,
"learning_rate": 4.427726408104139e-06,
"loss": 0.6131,
"step": 8688
},
{
"epoch": 0.55,
"grad_norm": 0.8582731485366821,
"learning_rate": 4.42670716882991e-06,
"loss": 0.6055,
"step": 8689
},
{
"epoch": 0.55,
"grad_norm": 0.87099689245224,
"learning_rate": 4.4256879536950495e-06,
"loss": 0.5777,
"step": 8690
},
{
"epoch": 0.55,
"grad_norm": 0.8034923076629639,
"learning_rate": 4.4246687627424686e-06,
"loss": 0.5599,
"step": 8691
},
{
"epoch": 0.55,
"grad_norm": 0.9542539119720459,
"learning_rate": 4.423649596015086e-06,
"loss": 0.6035,
"step": 8692
},
{
"epoch": 0.55,
"grad_norm": 0.8825756907463074,
"learning_rate": 4.422630453555814e-06,
"loss": 0.5617,
"step": 8693
},
{
"epoch": 0.55,
"grad_norm": 0.9769675135612488,
"learning_rate": 4.4216113354075654e-06,
"loss": 0.5966,
"step": 8694
},
{
"epoch": 0.55,
"grad_norm": 0.8933631777763367,
"learning_rate": 4.420592241613251e-06,
"loss": 0.6053,
"step": 8695
},
{
"epoch": 0.55,
"grad_norm": 0.9478098154067993,
"learning_rate": 4.4195731722157805e-06,
"loss": 0.5765,
"step": 8696
},
{
"epoch": 0.55,
"grad_norm": 0.8842805027961731,
"learning_rate": 4.418554127258066e-06,
"loss": 0.6167,
"step": 8697
},
{
"epoch": 0.55,
"grad_norm": 0.9180606007575989,
"learning_rate": 4.417535106783015e-06,
"loss": 0.6466,
"step": 8698
},
{
"epoch": 0.55,
"grad_norm": 0.8405973315238953,
"learning_rate": 4.416516110833533e-06,
"loss": 0.5296,
"step": 8699
},
{
"epoch": 0.55,
"grad_norm": 0.8814043998718262,
"learning_rate": 4.415497139452528e-06,
"loss": 0.6272,
"step": 8700
},
{
"epoch": 0.55,
"grad_norm": 0.9587448835372925,
"learning_rate": 4.414478192682905e-06,
"loss": 0.6149,
"step": 8701
},
{
"epoch": 0.55,
"grad_norm": 0.8646758198738098,
"learning_rate": 4.41345927056757e-06,
"loss": 0.53,
"step": 8702
},
{
"epoch": 0.55,
"grad_norm": 0.917334794998169,
"learning_rate": 4.4124403731494235e-06,
"loss": 0.6441,
"step": 8703
},
{
"epoch": 0.55,
"grad_norm": 0.8852252960205078,
"learning_rate": 4.4114215004713665e-06,
"loss": 0.5874,
"step": 8704
},
{
"epoch": 0.55,
"grad_norm": 0.8766316771507263,
"learning_rate": 4.410402652576307e-06,
"loss": 0.5342,
"step": 8705
},
{
"epoch": 0.55,
"grad_norm": 0.904898464679718,
"learning_rate": 4.409383829507139e-06,
"loss": 0.5368,
"step": 8706
},
{
"epoch": 0.55,
"grad_norm": 0.877981960773468,
"learning_rate": 4.408365031306763e-06,
"loss": 0.5601,
"step": 8707
},
{
"epoch": 0.55,
"grad_norm": 0.9853324890136719,
"learning_rate": 4.407346258018078e-06,
"loss": 0.6007,
"step": 8708
},
{
"epoch": 0.55,
"grad_norm": 0.8749459981918335,
"learning_rate": 4.4063275096839785e-06,
"loss": 0.5521,
"step": 8709
},
{
"epoch": 0.55,
"grad_norm": 0.9380457997322083,
"learning_rate": 4.405308786347365e-06,
"loss": 0.5872,
"step": 8710
},
{
"epoch": 0.55,
"grad_norm": 0.9295644164085388,
"learning_rate": 4.404290088051128e-06,
"loss": 0.5885,
"step": 8711
},
{
"epoch": 0.55,
"grad_norm": 0.8839128613471985,
"learning_rate": 4.403271414838164e-06,
"loss": 0.5699,
"step": 8712
},
{
"epoch": 0.55,
"grad_norm": 0.9140700101852417,
"learning_rate": 4.402252766751363e-06,
"loss": 0.5752,
"step": 8713
},
{
"epoch": 0.55,
"grad_norm": 0.8850396871566772,
"learning_rate": 4.401234143833621e-06,
"loss": 0.5811,
"step": 8714
},
{
"epoch": 0.55,
"grad_norm": 0.8859695196151733,
"learning_rate": 4.400215546127825e-06,
"loss": 0.5739,
"step": 8715
},
{
"epoch": 0.55,
"grad_norm": 0.8375210762023926,
"learning_rate": 4.399196973676867e-06,
"loss": 0.5541,
"step": 8716
},
{
"epoch": 0.55,
"grad_norm": 0.9590082168579102,
"learning_rate": 4.398178426523632e-06,
"loss": 0.6474,
"step": 8717
},
{
"epoch": 0.55,
"grad_norm": 0.9406694769859314,
"learning_rate": 4.3971599047110116e-06,
"loss": 0.5898,
"step": 8718
},
{
"epoch": 0.55,
"grad_norm": 0.8761373162269592,
"learning_rate": 4.3961414082818904e-06,
"loss": 0.5739,
"step": 8719
},
{
"epoch": 0.55,
"grad_norm": 0.9185157418251038,
"learning_rate": 4.395122937279154e-06,
"loss": 0.6258,
"step": 8720
},
{
"epoch": 0.55,
"grad_norm": 0.8924233317375183,
"learning_rate": 4.394104491745686e-06,
"loss": 0.5819,
"step": 8721
},
{
"epoch": 0.55,
"grad_norm": 0.7826727032661438,
"learning_rate": 4.393086071724371e-06,
"loss": 0.5175,
"step": 8722
},
{
"epoch": 0.55,
"grad_norm": 0.9112175107002258,
"learning_rate": 4.392067677258089e-06,
"loss": 0.644,
"step": 8723
},
{
"epoch": 0.55,
"grad_norm": 0.8790960907936096,
"learning_rate": 4.391049308389723e-06,
"loss": 0.5537,
"step": 8724
},
{
"epoch": 0.55,
"grad_norm": 0.8678017854690552,
"learning_rate": 4.390030965162153e-06,
"loss": 0.5639,
"step": 8725
},
{
"epoch": 0.55,
"grad_norm": 0.8150790929794312,
"learning_rate": 4.389012647618255e-06,
"loss": 0.5596,
"step": 8726
},
{
"epoch": 0.55,
"grad_norm": 0.9205260872840881,
"learning_rate": 4.387994355800909e-06,
"loss": 0.6169,
"step": 8727
},
{
"epoch": 0.55,
"grad_norm": 0.8948895931243896,
"learning_rate": 4.386976089752994e-06,
"loss": 0.5627,
"step": 8728
},
{
"epoch": 0.55,
"grad_norm": 0.9792115688323975,
"learning_rate": 4.385957849517383e-06,
"loss": 0.6722,
"step": 8729
},
{
"epoch": 0.55,
"grad_norm": 0.8200992941856384,
"learning_rate": 4.384939635136948e-06,
"loss": 0.5813,
"step": 8730
},
{
"epoch": 0.55,
"grad_norm": 0.8706687092781067,
"learning_rate": 4.383921446654567e-06,
"loss": 0.5613,
"step": 8731
},
{
"epoch": 0.55,
"grad_norm": 0.9445881247520447,
"learning_rate": 4.3829032841131116e-06,
"loss": 0.5922,
"step": 8732
},
{
"epoch": 0.55,
"grad_norm": 0.9358285665512085,
"learning_rate": 4.381885147555453e-06,
"loss": 0.6582,
"step": 8733
},
{
"epoch": 0.55,
"grad_norm": 0.9091733694076538,
"learning_rate": 4.380867037024457e-06,
"loss": 0.5389,
"step": 8734
},
{
"epoch": 0.55,
"grad_norm": 0.8815348744392395,
"learning_rate": 4.379848952562999e-06,
"loss": 0.6216,
"step": 8735
},
{
"epoch": 0.55,
"grad_norm": 0.859954833984375,
"learning_rate": 4.3788308942139435e-06,
"loss": 0.568,
"step": 8736
},
{
"epoch": 0.55,
"grad_norm": 0.838782548904419,
"learning_rate": 4.3778128620201595e-06,
"loss": 0.546,
"step": 8737
},
{
"epoch": 0.55,
"grad_norm": 0.8781520128250122,
"learning_rate": 4.376794856024509e-06,
"loss": 0.6458,
"step": 8738
},
{
"epoch": 0.55,
"grad_norm": 0.908060610294342,
"learning_rate": 4.37577687626986e-06,
"loss": 0.5873,
"step": 8739
},
{
"epoch": 0.55,
"grad_norm": 0.8939605355262756,
"learning_rate": 4.374758922799076e-06,
"loss": 0.5933,
"step": 8740
},
{
"epoch": 0.55,
"grad_norm": 0.9307653307914734,
"learning_rate": 4.373740995655019e-06,
"loss": 0.6065,
"step": 8741
},
{
"epoch": 0.55,
"grad_norm": 0.9014899134635925,
"learning_rate": 4.372723094880549e-06,
"loss": 0.667,
"step": 8742
},
{
"epoch": 0.55,
"grad_norm": 0.9142792820930481,
"learning_rate": 4.371705220518526e-06,
"loss": 0.5868,
"step": 8743
},
{
"epoch": 0.55,
"grad_norm": 0.8495569229125977,
"learning_rate": 4.3706873726118135e-06,
"loss": 0.5737,
"step": 8744
},
{
"epoch": 0.55,
"grad_norm": 0.8663256168365479,
"learning_rate": 4.369669551203266e-06,
"loss": 0.5857,
"step": 8745
},
{
"epoch": 0.55,
"grad_norm": 0.8795154690742493,
"learning_rate": 4.368651756335739e-06,
"loss": 0.601,
"step": 8746
},
{
"epoch": 0.55,
"grad_norm": 0.9166411757469177,
"learning_rate": 4.36763398805209e-06,
"loss": 0.5774,
"step": 8747
},
{
"epoch": 0.55,
"grad_norm": 0.9235051870346069,
"learning_rate": 4.366616246395177e-06,
"loss": 0.5795,
"step": 8748
},
{
"epoch": 0.55,
"grad_norm": 0.9201914072036743,
"learning_rate": 4.365598531407849e-06,
"loss": 0.6128,
"step": 8749
},
{
"epoch": 0.55,
"grad_norm": 0.9426406025886536,
"learning_rate": 4.364580843132959e-06,
"loss": 0.5561,
"step": 8750
},
{
"epoch": 0.55,
"grad_norm": 0.8633618354797363,
"learning_rate": 4.363563181613359e-06,
"loss": 0.5643,
"step": 8751
},
{
"epoch": 0.55,
"grad_norm": 0.8648019433021545,
"learning_rate": 4.362545546891901e-06,
"loss": 0.5829,
"step": 8752
},
{
"epoch": 0.55,
"grad_norm": 0.9268175363540649,
"learning_rate": 4.361527939011433e-06,
"loss": 0.5497,
"step": 8753
},
{
"epoch": 0.55,
"grad_norm": 0.9331282377243042,
"learning_rate": 4.360510358014801e-06,
"loss": 0.6365,
"step": 8754
},
{
"epoch": 0.55,
"grad_norm": 0.9760857820510864,
"learning_rate": 4.359492803944854e-06,
"loss": 0.617,
"step": 8755
},
{
"epoch": 0.55,
"grad_norm": 0.8864888548851013,
"learning_rate": 4.358475276844435e-06,
"loss": 0.5794,
"step": 8756
},
{
"epoch": 0.55,
"grad_norm": 0.8550977110862732,
"learning_rate": 4.357457776756392e-06,
"loss": 0.6164,
"step": 8757
},
{
"epoch": 0.55,
"grad_norm": 0.8886324167251587,
"learning_rate": 4.3564403037235666e-06,
"loss": 0.5582,
"step": 8758
},
{
"epoch": 0.55,
"grad_norm": 0.8365561962127686,
"learning_rate": 4.355422857788802e-06,
"loss": 0.5514,
"step": 8759
},
{
"epoch": 0.55,
"grad_norm": 0.8547555208206177,
"learning_rate": 4.3544054389949366e-06,
"loss": 0.579,
"step": 8760
},
{
"epoch": 0.56,
"grad_norm": 0.9086821675300598,
"learning_rate": 4.353388047384813e-06,
"loss": 0.5918,
"step": 8761
},
{
"epoch": 0.56,
"grad_norm": 0.8336657881736755,
"learning_rate": 4.35237068300127e-06,
"loss": 0.5914,
"step": 8762
},
{
"epoch": 0.56,
"grad_norm": 1.0008983612060547,
"learning_rate": 4.351353345887145e-06,
"loss": 0.6075,
"step": 8763
},
{
"epoch": 0.56,
"grad_norm": 0.9368928074836731,
"learning_rate": 4.350336036085272e-06,
"loss": 0.6175,
"step": 8764
},
{
"epoch": 0.56,
"grad_norm": 0.9230781197547913,
"learning_rate": 4.349318753638491e-06,
"loss": 0.6663,
"step": 8765
},
{
"epoch": 0.56,
"grad_norm": 0.8669142127037048,
"learning_rate": 4.348301498589632e-06,
"loss": 0.5702,
"step": 8766
},
{
"epoch": 0.56,
"grad_norm": 0.908332347869873,
"learning_rate": 4.347284270981531e-06,
"loss": 0.6157,
"step": 8767
},
{
"epoch": 0.56,
"grad_norm": 0.8867782950401306,
"learning_rate": 4.346267070857017e-06,
"loss": 0.5932,
"step": 8768
},
{
"epoch": 0.56,
"grad_norm": 0.8559575080871582,
"learning_rate": 4.3452498982589234e-06,
"loss": 0.5792,
"step": 8769
},
{
"epoch": 0.56,
"grad_norm": 0.8476456999778748,
"learning_rate": 4.34423275323008e-06,
"loss": 0.5813,
"step": 8770
},
{
"epoch": 0.56,
"grad_norm": 0.8500044941902161,
"learning_rate": 4.343215635813314e-06,
"loss": 0.5623,
"step": 8771
},
{
"epoch": 0.56,
"grad_norm": 0.8590050935745239,
"learning_rate": 4.3421985460514515e-06,
"loss": 0.5822,
"step": 8772
},
{
"epoch": 0.56,
"grad_norm": 0.882090151309967,
"learning_rate": 4.341181483987319e-06,
"loss": 0.5638,
"step": 8773
},
{
"epoch": 0.56,
"grad_norm": 0.8285457491874695,
"learning_rate": 4.340164449663745e-06,
"loss": 0.5572,
"step": 8774
},
{
"epoch": 0.56,
"grad_norm": 0.8915181159973145,
"learning_rate": 4.33914744312355e-06,
"loss": 0.5627,
"step": 8775
},
{
"epoch": 0.56,
"grad_norm": 0.9251353740692139,
"learning_rate": 4.338130464409556e-06,
"loss": 0.5431,
"step": 8776
},
{
"epoch": 0.56,
"grad_norm": 0.9160726070404053,
"learning_rate": 4.3371135135645845e-06,
"loss": 0.6369,
"step": 8777
},
{
"epoch": 0.56,
"grad_norm": 0.9499028325080872,
"learning_rate": 4.33609659063146e-06,
"loss": 0.5955,
"step": 8778
},
{
"epoch": 0.56,
"grad_norm": 0.8448708653450012,
"learning_rate": 4.335079695652998e-06,
"loss": 0.6101,
"step": 8779
},
{
"epoch": 0.56,
"grad_norm": 1.0195928812026978,
"learning_rate": 4.334062828672016e-06,
"loss": 0.6204,
"step": 8780
},
{
"epoch": 0.56,
"grad_norm": 0.9017850756645203,
"learning_rate": 4.3330459897313305e-06,
"loss": 0.5725,
"step": 8781
},
{
"epoch": 0.56,
"grad_norm": 0.8847092390060425,
"learning_rate": 4.33202917887376e-06,
"loss": 0.5784,
"step": 8782
},
{
"epoch": 0.56,
"grad_norm": 0.864553689956665,
"learning_rate": 4.331012396142117e-06,
"loss": 0.5691,
"step": 8783
},
{
"epoch": 0.56,
"grad_norm": 0.8894702792167664,
"learning_rate": 4.3299956415792145e-06,
"loss": 0.6365,
"step": 8784
},
{
"epoch": 0.56,
"grad_norm": 0.8423247337341309,
"learning_rate": 4.328978915227866e-06,
"loss": 0.5971,
"step": 8785
},
{
"epoch": 0.56,
"grad_norm": 0.9544634819030762,
"learning_rate": 4.327962217130878e-06,
"loss": 0.6287,
"step": 8786
},
{
"epoch": 0.56,
"grad_norm": 0.9328646659851074,
"learning_rate": 4.326945547331065e-06,
"loss": 0.5529,
"step": 8787
},
{
"epoch": 0.56,
"grad_norm": 0.921759843826294,
"learning_rate": 4.325928905871233e-06,
"loss": 0.6143,
"step": 8788
},
{
"epoch": 0.56,
"grad_norm": 0.8561935424804688,
"learning_rate": 4.324912292794192e-06,
"loss": 0.6107,
"step": 8789
},
{
"epoch": 0.56,
"grad_norm": 0.8923735618591309,
"learning_rate": 4.323895708142742e-06,
"loss": 0.5331,
"step": 8790
},
{
"epoch": 0.56,
"grad_norm": 0.8794368505477905,
"learning_rate": 4.322879151959695e-06,
"loss": 0.5809,
"step": 8791
},
{
"epoch": 0.56,
"grad_norm": 0.8946419358253479,
"learning_rate": 4.321862624287851e-06,
"loss": 0.5801,
"step": 8792
},
{
"epoch": 0.56,
"grad_norm": 0.9291636943817139,
"learning_rate": 4.320846125170012e-06,
"loss": 0.6148,
"step": 8793
},
{
"epoch": 0.56,
"grad_norm": 0.8626858592033386,
"learning_rate": 4.31982965464898e-06,
"loss": 0.5753,
"step": 8794
},
{
"epoch": 0.56,
"grad_norm": 0.9002351760864258,
"learning_rate": 4.318813212767555e-06,
"loss": 0.5691,
"step": 8795
},
{
"epoch": 0.56,
"grad_norm": 0.8788061141967773,
"learning_rate": 4.317796799568536e-06,
"loss": 0.6002,
"step": 8796
},
{
"epoch": 0.56,
"grad_norm": 0.8354102373123169,
"learning_rate": 4.316780415094722e-06,
"loss": 0.5693,
"step": 8797
},
{
"epoch": 0.56,
"grad_norm": 0.89030921459198,
"learning_rate": 4.315764059388905e-06,
"loss": 0.5916,
"step": 8798
},
{
"epoch": 0.56,
"grad_norm": 0.856412947177887,
"learning_rate": 4.314747732493886e-06,
"loss": 0.563,
"step": 8799
},
{
"epoch": 0.56,
"grad_norm": 0.832213282585144,
"learning_rate": 4.313731434452455e-06,
"loss": 0.5612,
"step": 8800
},
{
"epoch": 0.56,
"grad_norm": 0.8731396794319153,
"learning_rate": 4.312715165307407e-06,
"loss": 0.6631,
"step": 8801
},
{
"epoch": 0.56,
"grad_norm": 0.9026145935058594,
"learning_rate": 4.311698925101532e-06,
"loss": 0.5776,
"step": 8802
},
{
"epoch": 0.56,
"grad_norm": 0.8666503429412842,
"learning_rate": 4.310682713877619e-06,
"loss": 0.5579,
"step": 8803
},
{
"epoch": 0.56,
"grad_norm": 0.9560415744781494,
"learning_rate": 4.30966653167846e-06,
"loss": 0.6415,
"step": 8804
},
{
"epoch": 0.56,
"grad_norm": 0.8633235096931458,
"learning_rate": 4.308650378546843e-06,
"loss": 0.5844,
"step": 8805
},
{
"epoch": 0.56,
"grad_norm": 0.8731099367141724,
"learning_rate": 4.3076342545255535e-06,
"loss": 0.5678,
"step": 8806
},
{
"epoch": 0.56,
"grad_norm": 0.8647497296333313,
"learning_rate": 4.306618159657375e-06,
"loss": 0.5964,
"step": 8807
},
{
"epoch": 0.56,
"grad_norm": 0.867325484752655,
"learning_rate": 4.305602093985095e-06,
"loss": 0.597,
"step": 8808
},
{
"epoch": 0.56,
"grad_norm": 0.8929917812347412,
"learning_rate": 4.3045860575514955e-06,
"loss": 0.5933,
"step": 8809
},
{
"epoch": 0.56,
"grad_norm": 0.8812966346740723,
"learning_rate": 4.303570050399358e-06,
"loss": 0.6035,
"step": 8810
},
{
"epoch": 0.56,
"grad_norm": 0.8197950124740601,
"learning_rate": 4.302554072571461e-06,
"loss": 0.5531,
"step": 8811
},
{
"epoch": 0.56,
"grad_norm": 0.8902185559272766,
"learning_rate": 4.301538124110588e-06,
"loss": 0.6078,
"step": 8812
},
{
"epoch": 0.56,
"grad_norm": 0.8458168506622314,
"learning_rate": 4.300522205059515e-06,
"loss": 0.5865,
"step": 8813
},
{
"epoch": 0.56,
"grad_norm": 0.9090011119842529,
"learning_rate": 4.299506315461018e-06,
"loss": 0.5848,
"step": 8814
},
{
"epoch": 0.56,
"grad_norm": 0.8977993726730347,
"learning_rate": 4.2984904553578725e-06,
"loss": 0.607,
"step": 8815
},
{
"epoch": 0.56,
"grad_norm": 0.888264000415802,
"learning_rate": 4.297474624792853e-06,
"loss": 0.5694,
"step": 8816
},
{
"epoch": 0.56,
"grad_norm": 0.8837360739707947,
"learning_rate": 4.296458823808735e-06,
"loss": 0.5943,
"step": 8817
},
{
"epoch": 0.56,
"grad_norm": 0.9838821887969971,
"learning_rate": 4.295443052448288e-06,
"loss": 0.6391,
"step": 8818
},
{
"epoch": 0.56,
"grad_norm": 0.8406442999839783,
"learning_rate": 4.294427310754283e-06,
"loss": 0.5576,
"step": 8819
},
{
"epoch": 0.56,
"grad_norm": 0.9092972278594971,
"learning_rate": 4.293411598769487e-06,
"loss": 0.6397,
"step": 8820
},
{
"epoch": 0.56,
"grad_norm": 0.8684690594673157,
"learning_rate": 4.292395916536674e-06,
"loss": 0.5996,
"step": 8821
},
{
"epoch": 0.56,
"grad_norm": 0.938960075378418,
"learning_rate": 4.291380264098607e-06,
"loss": 0.6217,
"step": 8822
},
{
"epoch": 0.56,
"grad_norm": 0.9801902174949646,
"learning_rate": 4.290364641498051e-06,
"loss": 0.5536,
"step": 8823
},
{
"epoch": 0.56,
"grad_norm": 0.8995389938354492,
"learning_rate": 4.28934904877777e-06,
"loss": 0.5696,
"step": 8824
},
{
"epoch": 0.56,
"grad_norm": 0.9216705560684204,
"learning_rate": 4.288333485980531e-06,
"loss": 0.5857,
"step": 8825
},
{
"epoch": 0.56,
"grad_norm": 0.8806384205818176,
"learning_rate": 4.287317953149092e-06,
"loss": 0.592,
"step": 8826
},
{
"epoch": 0.56,
"grad_norm": 0.8204985857009888,
"learning_rate": 4.2863024503262146e-06,
"loss": 0.4938,
"step": 8827
},
{
"epoch": 0.56,
"grad_norm": 0.9380052089691162,
"learning_rate": 4.285286977554657e-06,
"loss": 0.6195,
"step": 8828
},
{
"epoch": 0.56,
"grad_norm": 0.9966148734092712,
"learning_rate": 4.284271534877181e-06,
"loss": 0.6133,
"step": 8829
},
{
"epoch": 0.56,
"grad_norm": 0.9887740015983582,
"learning_rate": 4.283256122336539e-06,
"loss": 0.6519,
"step": 8830
},
{
"epoch": 0.56,
"grad_norm": 0.8869448900222778,
"learning_rate": 4.28224073997549e-06,
"loss": 0.6057,
"step": 8831
},
{
"epoch": 0.56,
"grad_norm": 0.8631427884101868,
"learning_rate": 4.281225387836786e-06,
"loss": 0.552,
"step": 8832
},
{
"epoch": 0.56,
"grad_norm": 0.8739815354347229,
"learning_rate": 4.280210065963179e-06,
"loss": 0.5631,
"step": 8833
},
{
"epoch": 0.56,
"grad_norm": 0.8833276629447937,
"learning_rate": 4.279194774397422e-06,
"loss": 0.5637,
"step": 8834
},
{
"epoch": 0.56,
"grad_norm": 0.8888053894042969,
"learning_rate": 4.278179513182268e-06,
"loss": 0.5978,
"step": 8835
},
{
"epoch": 0.56,
"grad_norm": 0.9251044392585754,
"learning_rate": 4.2771642823604635e-06,
"loss": 0.5844,
"step": 8836
},
{
"epoch": 0.56,
"grad_norm": 0.9090611934661865,
"learning_rate": 4.276149081974754e-06,
"loss": 0.5963,
"step": 8837
},
{
"epoch": 0.56,
"grad_norm": 0.8514662384986877,
"learning_rate": 4.275133912067889e-06,
"loss": 0.5199,
"step": 8838
},
{
"epoch": 0.56,
"grad_norm": 0.9209812879562378,
"learning_rate": 4.274118772682615e-06,
"loss": 0.5519,
"step": 8839
},
{
"epoch": 0.56,
"grad_norm": 0.9110792279243469,
"learning_rate": 4.273103663861675e-06,
"loss": 0.582,
"step": 8840
},
{
"epoch": 0.56,
"grad_norm": 0.8700342774391174,
"learning_rate": 4.272088585647808e-06,
"loss": 0.5325,
"step": 8841
},
{
"epoch": 0.56,
"grad_norm": 0.8676977157592773,
"learning_rate": 4.27107353808376e-06,
"loss": 0.5971,
"step": 8842
},
{
"epoch": 0.56,
"grad_norm": 0.8846719861030579,
"learning_rate": 4.2700585212122705e-06,
"loss": 0.5739,
"step": 8843
},
{
"epoch": 0.56,
"grad_norm": 0.9149625301361084,
"learning_rate": 4.269043535076077e-06,
"loss": 0.5704,
"step": 8844
},
{
"epoch": 0.56,
"grad_norm": 0.8597497344017029,
"learning_rate": 4.2680285797179155e-06,
"loss": 0.5798,
"step": 8845
},
{
"epoch": 0.56,
"grad_norm": 0.8914947509765625,
"learning_rate": 4.267013655180526e-06,
"loss": 0.6291,
"step": 8846
},
{
"epoch": 0.56,
"grad_norm": 0.8733804821968079,
"learning_rate": 4.265998761506641e-06,
"loss": 0.62,
"step": 8847
},
{
"epoch": 0.56,
"grad_norm": 0.8868311047554016,
"learning_rate": 4.264983898738996e-06,
"loss": 0.5277,
"step": 8848
},
{
"epoch": 0.56,
"grad_norm": 0.9130145907402039,
"learning_rate": 4.263969066920321e-06,
"loss": 0.5653,
"step": 8849
},
{
"epoch": 0.56,
"grad_norm": 0.9107689261436462,
"learning_rate": 4.262954266093347e-06,
"loss": 0.603,
"step": 8850
},
{
"epoch": 0.56,
"grad_norm": 0.8246250748634338,
"learning_rate": 4.261939496300807e-06,
"loss": 0.545,
"step": 8851
},
{
"epoch": 0.56,
"grad_norm": 0.8571567535400391,
"learning_rate": 4.260924757585427e-06,
"loss": 0.5724,
"step": 8852
},
{
"epoch": 0.56,
"grad_norm": 0.8799802660942078,
"learning_rate": 4.259910049989933e-06,
"loss": 0.5336,
"step": 8853
},
{
"epoch": 0.56,
"grad_norm": 0.8496525287628174,
"learning_rate": 4.258895373557051e-06,
"loss": 0.5659,
"step": 8854
},
{
"epoch": 0.56,
"grad_norm": 0.8902441263198853,
"learning_rate": 4.25788072832951e-06,
"loss": 0.6043,
"step": 8855
},
{
"epoch": 0.56,
"grad_norm": 0.8644466400146484,
"learning_rate": 4.256866114350029e-06,
"loss": 0.5516,
"step": 8856
},
{
"epoch": 0.56,
"grad_norm": 0.9049948453903198,
"learning_rate": 4.25585153166133e-06,
"loss": 0.669,
"step": 8857
},
{
"epoch": 0.56,
"grad_norm": 0.9145426750183105,
"learning_rate": 4.254836980306134e-06,
"loss": 0.5955,
"step": 8858
},
{
"epoch": 0.56,
"grad_norm": 0.8813319206237793,
"learning_rate": 4.253822460327162e-06,
"loss": 0.5738,
"step": 8859
},
{
"epoch": 0.56,
"grad_norm": 0.994666576385498,
"learning_rate": 4.25280797176713e-06,
"loss": 0.5967,
"step": 8860
},
{
"epoch": 0.56,
"grad_norm": 0.9201557636260986,
"learning_rate": 4.251793514668754e-06,
"loss": 0.5901,
"step": 8861
},
{
"epoch": 0.56,
"grad_norm": 0.8030241131782532,
"learning_rate": 4.250779089074752e-06,
"loss": 0.5547,
"step": 8862
},
{
"epoch": 0.56,
"grad_norm": 0.8798213601112366,
"learning_rate": 4.249764695027833e-06,
"loss": 0.6044,
"step": 8863
},
{
"epoch": 0.56,
"grad_norm": 0.8339963555335999,
"learning_rate": 4.248750332570716e-06,
"loss": 0.6645,
"step": 8864
},
{
"epoch": 0.56,
"grad_norm": 0.9063261151313782,
"learning_rate": 4.247736001746108e-06,
"loss": 0.5697,
"step": 8865
},
{
"epoch": 0.56,
"grad_norm": 0.9362940192222595,
"learning_rate": 4.246721702596721e-06,
"loss": 0.5943,
"step": 8866
},
{
"epoch": 0.56,
"grad_norm": 0.904593288898468,
"learning_rate": 4.24570743516526e-06,
"loss": 0.626,
"step": 8867
},
{
"epoch": 0.56,
"grad_norm": 0.9311546683311462,
"learning_rate": 4.2446931994944375e-06,
"loss": 0.5865,
"step": 8868
},
{
"epoch": 0.56,
"grad_norm": 0.9218093156814575,
"learning_rate": 4.243678995626955e-06,
"loss": 0.5746,
"step": 8869
},
{
"epoch": 0.56,
"grad_norm": 0.8506073355674744,
"learning_rate": 4.242664823605521e-06,
"loss": 0.5585,
"step": 8870
},
{
"epoch": 0.56,
"grad_norm": 0.9345227479934692,
"learning_rate": 4.241650683472834e-06,
"loss": 0.5323,
"step": 8871
},
{
"epoch": 0.56,
"grad_norm": 0.8558427095413208,
"learning_rate": 4.240636575271601e-06,
"loss": 0.4936,
"step": 8872
},
{
"epoch": 0.56,
"grad_norm": 0.8252081871032715,
"learning_rate": 4.239622499044519e-06,
"loss": 0.55,
"step": 8873
},
{
"epoch": 0.56,
"grad_norm": 0.8779731392860413,
"learning_rate": 4.23860845483429e-06,
"loss": 0.5766,
"step": 8874
},
{
"epoch": 0.56,
"grad_norm": 0.9093831181526184,
"learning_rate": 4.237594442683607e-06,
"loss": 0.6104,
"step": 8875
},
{
"epoch": 0.56,
"grad_norm": 0.8918717503547668,
"learning_rate": 4.236580462635173e-06,
"loss": 0.5824,
"step": 8876
},
{
"epoch": 0.56,
"grad_norm": 0.8895564675331116,
"learning_rate": 4.235566514731678e-06,
"loss": 0.6093,
"step": 8877
},
{
"epoch": 0.56,
"grad_norm": 0.9440225958824158,
"learning_rate": 4.23455259901582e-06,
"loss": 0.617,
"step": 8878
},
{
"epoch": 0.56,
"grad_norm": 0.8962016105651855,
"learning_rate": 4.2335387155302885e-06,
"loss": 0.5789,
"step": 8879
},
{
"epoch": 0.56,
"grad_norm": 0.8680998682975769,
"learning_rate": 4.232524864317773e-06,
"loss": 0.549,
"step": 8880
},
{
"epoch": 0.56,
"grad_norm": 0.8944227695465088,
"learning_rate": 4.231511045420967e-06,
"loss": 0.6008,
"step": 8881
},
{
"epoch": 0.56,
"grad_norm": 0.8181406259536743,
"learning_rate": 4.230497258882559e-06,
"loss": 0.5423,
"step": 8882
},
{
"epoch": 0.56,
"grad_norm": 0.9459832906723022,
"learning_rate": 4.229483504745233e-06,
"loss": 0.6108,
"step": 8883
},
{
"epoch": 0.56,
"grad_norm": 0.8519952297210693,
"learning_rate": 4.228469783051676e-06,
"loss": 0.5936,
"step": 8884
},
{
"epoch": 0.56,
"grad_norm": 0.8907895088195801,
"learning_rate": 4.227456093844573e-06,
"loss": 0.5805,
"step": 8885
},
{
"epoch": 0.56,
"grad_norm": 1.0412497520446777,
"learning_rate": 4.226442437166607e-06,
"loss": 0.5508,
"step": 8886
},
{
"epoch": 0.56,
"grad_norm": 0.8901419639587402,
"learning_rate": 4.225428813060459e-06,
"loss": 0.5822,
"step": 8887
},
{
"epoch": 0.56,
"grad_norm": 0.8553881049156189,
"learning_rate": 4.224415221568807e-06,
"loss": 0.5854,
"step": 8888
},
{
"epoch": 0.56,
"grad_norm": 0.8875113129615784,
"learning_rate": 4.223401662734333e-06,
"loss": 0.5348,
"step": 8889
},
{
"epoch": 0.56,
"grad_norm": 0.8207681179046631,
"learning_rate": 4.222388136599715e-06,
"loss": 0.5878,
"step": 8890
},
{
"epoch": 0.56,
"grad_norm": 0.8918472528457642,
"learning_rate": 4.221374643207626e-06,
"loss": 0.5744,
"step": 8891
},
{
"epoch": 0.56,
"grad_norm": 0.8446689248085022,
"learning_rate": 4.220361182600742e-06,
"loss": 0.6045,
"step": 8892
},
{
"epoch": 0.56,
"grad_norm": 0.883139967918396,
"learning_rate": 4.219347754821737e-06,
"loss": 0.5616,
"step": 8893
},
{
"epoch": 0.56,
"grad_norm": 0.9404736161231995,
"learning_rate": 4.218334359913283e-06,
"loss": 0.565,
"step": 8894
},
{
"epoch": 0.56,
"grad_norm": 0.8452960848808289,
"learning_rate": 4.217320997918048e-06,
"loss": 0.5913,
"step": 8895
},
{
"epoch": 0.56,
"grad_norm": 0.9132777452468872,
"learning_rate": 4.216307668878706e-06,
"loss": 0.6401,
"step": 8896
},
{
"epoch": 0.56,
"grad_norm": 0.8934757113456726,
"learning_rate": 4.2152943728379185e-06,
"loss": 0.618,
"step": 8897
},
{
"epoch": 0.56,
"grad_norm": 0.9089536070823669,
"learning_rate": 4.214281109838357e-06,
"loss": 0.545,
"step": 8898
},
{
"epoch": 0.56,
"grad_norm": 0.8987053036689758,
"learning_rate": 4.213267879922685e-06,
"loss": 0.6345,
"step": 8899
},
{
"epoch": 0.56,
"grad_norm": 0.8274092078208923,
"learning_rate": 4.212254683133565e-06,
"loss": 0.5152,
"step": 8900
},
{
"epoch": 0.56,
"grad_norm": 0.8774511814117432,
"learning_rate": 4.2112415195136585e-06,
"loss": 0.5542,
"step": 8901
},
{
"epoch": 0.56,
"grad_norm": 0.9276379942893982,
"learning_rate": 4.21022838910563e-06,
"loss": 0.5648,
"step": 8902
},
{
"epoch": 0.56,
"grad_norm": 0.8499544858932495,
"learning_rate": 4.209215291952135e-06,
"loss": 0.5782,
"step": 8903
},
{
"epoch": 0.56,
"grad_norm": 0.8849813938140869,
"learning_rate": 4.208202228095835e-06,
"loss": 0.5964,
"step": 8904
},
{
"epoch": 0.56,
"grad_norm": 0.9096781611442566,
"learning_rate": 4.207189197579382e-06,
"loss": 0.5476,
"step": 8905
},
{
"epoch": 0.56,
"grad_norm": 0.7918185591697693,
"learning_rate": 4.2061762004454365e-06,
"loss": 0.5343,
"step": 8906
},
{
"epoch": 0.56,
"grad_norm": 0.9129202365875244,
"learning_rate": 4.2051632367366485e-06,
"loss": 0.6456,
"step": 8907
},
{
"epoch": 0.56,
"grad_norm": 0.8676325082778931,
"learning_rate": 4.204150306495672e-06,
"loss": 0.6489,
"step": 8908
},
{
"epoch": 0.56,
"grad_norm": 0.8340794444084167,
"learning_rate": 4.203137409765159e-06,
"loss": 0.5279,
"step": 8909
},
{
"epoch": 0.56,
"grad_norm": 0.8473523855209351,
"learning_rate": 4.202124546587754e-06,
"loss": 0.5894,
"step": 8910
},
{
"epoch": 0.56,
"grad_norm": 0.8485411405563354,
"learning_rate": 4.201111717006111e-06,
"loss": 0.6015,
"step": 8911
},
{
"epoch": 0.56,
"grad_norm": 1.0016659498214722,
"learning_rate": 4.200098921062875e-06,
"loss": 0.5994,
"step": 8912
},
{
"epoch": 0.56,
"grad_norm": 0.8975883722305298,
"learning_rate": 4.19908615880069e-06,
"loss": 0.6251,
"step": 8913
},
{
"epoch": 0.56,
"grad_norm": 0.9073837399482727,
"learning_rate": 4.198073430262199e-06,
"loss": 0.617,
"step": 8914
},
{
"epoch": 0.56,
"grad_norm": 0.8681656122207642,
"learning_rate": 4.197060735490048e-06,
"loss": 0.5584,
"step": 8915
},
{
"epoch": 0.56,
"grad_norm": 0.8720282912254333,
"learning_rate": 4.196048074526876e-06,
"loss": 0.6311,
"step": 8916
},
{
"epoch": 0.56,
"grad_norm": 0.9659051299095154,
"learning_rate": 4.195035447415324e-06,
"loss": 0.625,
"step": 8917
},
{
"epoch": 0.57,
"grad_norm": 0.8648727536201477,
"learning_rate": 4.194022854198026e-06,
"loss": 0.6112,
"step": 8918
},
{
"epoch": 0.57,
"grad_norm": 0.8482996225357056,
"learning_rate": 4.193010294917624e-06,
"loss": 0.5382,
"step": 8919
},
{
"epoch": 0.57,
"grad_norm": 0.877569317817688,
"learning_rate": 4.1919977696167515e-06,
"loss": 0.5412,
"step": 8920
},
{
"epoch": 0.57,
"grad_norm": 0.8534306287765503,
"learning_rate": 4.190985278338042e-06,
"loss": 0.5792,
"step": 8921
},
{
"epoch": 0.57,
"grad_norm": 0.9521181583404541,
"learning_rate": 4.189972821124126e-06,
"loss": 0.6252,
"step": 8922
},
{
"epoch": 0.57,
"grad_norm": 0.8283462524414062,
"learning_rate": 4.188960398017638e-06,
"loss": 0.5668,
"step": 8923
},
{
"epoch": 0.57,
"grad_norm": 0.8727411031723022,
"learning_rate": 4.187948009061207e-06,
"loss": 0.5511,
"step": 8924
},
{
"epoch": 0.57,
"grad_norm": 0.8867582678794861,
"learning_rate": 4.186935654297461e-06,
"loss": 0.59,
"step": 8925
},
{
"epoch": 0.57,
"grad_norm": 0.859950602054596,
"learning_rate": 4.1859233337690245e-06,
"loss": 0.5337,
"step": 8926
},
{
"epoch": 0.57,
"grad_norm": 0.9106714129447937,
"learning_rate": 4.1849110475185225e-06,
"loss": 0.5833,
"step": 8927
},
{
"epoch": 0.57,
"grad_norm": 0.9669057130813599,
"learning_rate": 4.183898795588584e-06,
"loss": 0.5751,
"step": 8928
},
{
"epoch": 0.57,
"grad_norm": 0.8599669933319092,
"learning_rate": 4.1828865780218285e-06,
"loss": 0.5511,
"step": 8929
},
{
"epoch": 0.57,
"grad_norm": 0.8547632694244385,
"learning_rate": 4.181874394860875e-06,
"loss": 0.5715,
"step": 8930
},
{
"epoch": 0.57,
"grad_norm": 0.8837994337081909,
"learning_rate": 4.180862246148344e-06,
"loss": 0.5981,
"step": 8931
},
{
"epoch": 0.57,
"grad_norm": 0.8469040989875793,
"learning_rate": 4.1798501319268565e-06,
"loss": 0.5338,
"step": 8932
},
{
"epoch": 0.57,
"grad_norm": 0.9077805876731873,
"learning_rate": 4.178838052239027e-06,
"loss": 0.5999,
"step": 8933
},
{
"epoch": 0.57,
"grad_norm": 0.8736510276794434,
"learning_rate": 4.177826007127468e-06,
"loss": 0.561,
"step": 8934
},
{
"epoch": 0.57,
"grad_norm": 0.8860734701156616,
"learning_rate": 4.176813996634796e-06,
"loss": 0.5776,
"step": 8935
},
{
"epoch": 0.57,
"grad_norm": 0.8876895904541016,
"learning_rate": 4.175802020803624e-06,
"loss": 0.5732,
"step": 8936
},
{
"epoch": 0.57,
"grad_norm": 0.9345043301582336,
"learning_rate": 4.174790079676563e-06,
"loss": 0.5576,
"step": 8937
},
{
"epoch": 0.57,
"grad_norm": 0.8652613162994385,
"learning_rate": 4.173778173296219e-06,
"loss": 0.5698,
"step": 8938
},
{
"epoch": 0.57,
"grad_norm": 0.990037739276886,
"learning_rate": 4.172766301705202e-06,
"loss": 0.6013,
"step": 8939
},
{
"epoch": 0.57,
"grad_norm": 0.9194901585578918,
"learning_rate": 4.171754464946119e-06,
"loss": 0.6357,
"step": 8940
},
{
"epoch": 0.57,
"grad_norm": 0.8597732782363892,
"learning_rate": 4.170742663061575e-06,
"loss": 0.5582,
"step": 8941
},
{
"epoch": 0.57,
"grad_norm": 0.9255541563034058,
"learning_rate": 4.169730896094172e-06,
"loss": 0.5464,
"step": 8942
},
{
"epoch": 0.57,
"grad_norm": 0.9251505732536316,
"learning_rate": 4.1687191640865135e-06,
"loss": 0.5523,
"step": 8943
},
{
"epoch": 0.57,
"grad_norm": 0.8354572653770447,
"learning_rate": 4.167707467081197e-06,
"loss": 0.5892,
"step": 8944
},
{
"epoch": 0.57,
"grad_norm": 0.9447482228279114,
"learning_rate": 4.166695805120825e-06,
"loss": 0.7007,
"step": 8945
},
{
"epoch": 0.57,
"grad_norm": 0.8327589631080627,
"learning_rate": 4.165684178247993e-06,
"loss": 0.5409,
"step": 8946
},
{
"epoch": 0.57,
"grad_norm": 0.9203556180000305,
"learning_rate": 4.1646725865053005e-06,
"loss": 0.6141,
"step": 8947
},
{
"epoch": 0.57,
"grad_norm": 0.9170238375663757,
"learning_rate": 4.163661029935336e-06,
"loss": 0.545,
"step": 8948
},
{
"epoch": 0.57,
"grad_norm": 0.8779581785202026,
"learning_rate": 4.162649508580698e-06,
"loss": 0.5778,
"step": 8949
},
{
"epoch": 0.57,
"grad_norm": 0.8849088549613953,
"learning_rate": 4.161638022483976e-06,
"loss": 0.5548,
"step": 8950
},
{
"epoch": 0.57,
"grad_norm": 0.8839111924171448,
"learning_rate": 4.160626571687761e-06,
"loss": 0.5711,
"step": 8951
},
{
"epoch": 0.57,
"grad_norm": 0.8382406234741211,
"learning_rate": 4.159615156234639e-06,
"loss": 0.5591,
"step": 8952
},
{
"epoch": 0.57,
"grad_norm": 0.8632530570030212,
"learning_rate": 4.158603776167201e-06,
"loss": 0.5473,
"step": 8953
},
{
"epoch": 0.57,
"grad_norm": 0.8566288352012634,
"learning_rate": 4.157592431528031e-06,
"loss": 0.5317,
"step": 8954
},
{
"epoch": 0.57,
"grad_norm": 0.8643941283226013,
"learning_rate": 4.156581122359714e-06,
"loss": 0.6105,
"step": 8955
},
{
"epoch": 0.57,
"grad_norm": 0.8673588633537292,
"learning_rate": 4.15556984870483e-06,
"loss": 0.5787,
"step": 8956
},
{
"epoch": 0.57,
"grad_norm": 0.8547856211662292,
"learning_rate": 4.1545586106059636e-06,
"loss": 0.5776,
"step": 8957
},
{
"epoch": 0.57,
"grad_norm": 0.8713629841804504,
"learning_rate": 4.153547408105691e-06,
"loss": 0.605,
"step": 8958
},
{
"epoch": 0.57,
"grad_norm": 0.8869353532791138,
"learning_rate": 4.152536241246595e-06,
"loss": 0.5782,
"step": 8959
},
{
"epoch": 0.57,
"grad_norm": 0.9460669755935669,
"learning_rate": 4.151525110071248e-06,
"loss": 0.5934,
"step": 8960
},
{
"epoch": 0.57,
"grad_norm": 0.8945161700248718,
"learning_rate": 4.1505140146222276e-06,
"loss": 0.6079,
"step": 8961
},
{
"epoch": 0.57,
"grad_norm": 0.8501721024513245,
"learning_rate": 4.149502954942107e-06,
"loss": 0.58,
"step": 8962
},
{
"epoch": 0.57,
"grad_norm": 0.8856709599494934,
"learning_rate": 4.148491931073459e-06,
"loss": 0.5962,
"step": 8963
},
{
"epoch": 0.57,
"grad_norm": 0.8691068887710571,
"learning_rate": 4.147480943058852e-06,
"loss": 0.6176,
"step": 8964
},
{
"epoch": 0.57,
"grad_norm": 0.9234523177146912,
"learning_rate": 4.146469990940858e-06,
"loss": 0.5897,
"step": 8965
},
{
"epoch": 0.57,
"grad_norm": 0.8816432356834412,
"learning_rate": 4.1454590747620424e-06,
"loss": 0.6297,
"step": 8966
},
{
"epoch": 0.57,
"grad_norm": 0.8956805467605591,
"learning_rate": 4.144448194564973e-06,
"loss": 0.5716,
"step": 8967
},
{
"epoch": 0.57,
"grad_norm": 0.9496785998344421,
"learning_rate": 4.1434373503922145e-06,
"loss": 0.5964,
"step": 8968
},
{
"epoch": 0.57,
"grad_norm": 0.8747205138206482,
"learning_rate": 4.142426542286329e-06,
"loss": 0.6098,
"step": 8969
},
{
"epoch": 0.57,
"grad_norm": 0.9270417094230652,
"learning_rate": 4.141415770289877e-06,
"loss": 0.5876,
"step": 8970
},
{
"epoch": 0.57,
"grad_norm": 0.8411609530448914,
"learning_rate": 4.140405034445423e-06,
"loss": 0.5882,
"step": 8971
},
{
"epoch": 0.57,
"grad_norm": 0.926416277885437,
"learning_rate": 4.13939433479552e-06,
"loss": 0.5833,
"step": 8972
},
{
"epoch": 0.57,
"grad_norm": 0.8865971565246582,
"learning_rate": 4.13838367138273e-06,
"loss": 0.5632,
"step": 8973
},
{
"epoch": 0.57,
"grad_norm": 0.9375487565994263,
"learning_rate": 4.137373044249604e-06,
"loss": 0.5751,
"step": 8974
},
{
"epoch": 0.57,
"grad_norm": 0.9372237324714661,
"learning_rate": 4.1363624534387e-06,
"loss": 0.5975,
"step": 8975
},
{
"epoch": 0.57,
"grad_norm": 0.8482964634895325,
"learning_rate": 4.135351898992568e-06,
"loss": 0.5627,
"step": 8976
},
{
"epoch": 0.57,
"grad_norm": 0.9388363361358643,
"learning_rate": 4.134341380953761e-06,
"loss": 0.5835,
"step": 8977
},
{
"epoch": 0.57,
"grad_norm": 0.8574067950248718,
"learning_rate": 4.133330899364824e-06,
"loss": 0.5772,
"step": 8978
},
{
"epoch": 0.57,
"grad_norm": 0.907927393913269,
"learning_rate": 4.1323204542683105e-06,
"loss": 0.6602,
"step": 8979
},
{
"epoch": 0.57,
"grad_norm": 0.926572322845459,
"learning_rate": 4.131310045706763e-06,
"loss": 0.6016,
"step": 8980
},
{
"epoch": 0.57,
"grad_norm": 0.9103202223777771,
"learning_rate": 4.130299673722729e-06,
"loss": 0.6473,
"step": 8981
},
{
"epoch": 0.57,
"grad_norm": 0.9199764728546143,
"learning_rate": 4.129289338358748e-06,
"loss": 0.5697,
"step": 8982
},
{
"epoch": 0.57,
"grad_norm": 0.904728889465332,
"learning_rate": 4.128279039657366e-06,
"loss": 0.627,
"step": 8983
},
{
"epoch": 0.57,
"grad_norm": 0.8930543065071106,
"learning_rate": 4.127268777661119e-06,
"loss": 0.6144,
"step": 8984
},
{
"epoch": 0.57,
"grad_norm": 0.8956807255744934,
"learning_rate": 4.126258552412551e-06,
"loss": 0.5895,
"step": 8985
},
{
"epoch": 0.57,
"grad_norm": 0.8710659742355347,
"learning_rate": 4.125248363954192e-06,
"loss": 0.5893,
"step": 8986
},
{
"epoch": 0.57,
"grad_norm": 0.852942943572998,
"learning_rate": 4.124238212328585e-06,
"loss": 0.5658,
"step": 8987
},
{
"epoch": 0.57,
"grad_norm": 0.9231775403022766,
"learning_rate": 4.123228097578258e-06,
"loss": 0.6293,
"step": 8988
},
{
"epoch": 0.57,
"grad_norm": 0.9329462647438049,
"learning_rate": 4.122218019745748e-06,
"loss": 0.6025,
"step": 8989
},
{
"epoch": 0.57,
"grad_norm": 0.9070497751235962,
"learning_rate": 4.121207978873582e-06,
"loss": 0.537,
"step": 8990
},
{
"epoch": 0.57,
"grad_norm": 0.8512255549430847,
"learning_rate": 4.12019797500429e-06,
"loss": 0.5603,
"step": 8991
},
{
"epoch": 0.57,
"grad_norm": 0.9133707880973816,
"learning_rate": 4.119188008180401e-06,
"loss": 0.5676,
"step": 8992
},
{
"epoch": 0.57,
"grad_norm": 0.9240803122520447,
"learning_rate": 4.118178078444442e-06,
"loss": 0.6035,
"step": 8993
},
{
"epoch": 0.57,
"grad_norm": 0.951643705368042,
"learning_rate": 4.117168185838936e-06,
"loss": 0.5913,
"step": 8994
},
{
"epoch": 0.57,
"grad_norm": 0.8662564754486084,
"learning_rate": 4.1161583304064055e-06,
"loss": 0.5592,
"step": 8995
},
{
"epoch": 0.57,
"grad_norm": 0.8806678056716919,
"learning_rate": 4.115148512189374e-06,
"loss": 0.621,
"step": 8996
},
{
"epoch": 0.57,
"grad_norm": 0.9231857657432556,
"learning_rate": 4.114138731230362e-06,
"loss": 0.6269,
"step": 8997
},
{
"epoch": 0.57,
"grad_norm": 0.8965012431144714,
"learning_rate": 4.113128987571885e-06,
"loss": 0.6336,
"step": 8998
},
{
"epoch": 0.57,
"grad_norm": 0.8867535591125488,
"learning_rate": 4.1121192812564595e-06,
"loss": 0.5398,
"step": 8999
},
{
"epoch": 0.57,
"grad_norm": 0.9486203789710999,
"learning_rate": 4.111109612326603e-06,
"loss": 0.6183,
"step": 9000
},
{
"epoch": 0.57,
"grad_norm": 0.9139353632926941,
"learning_rate": 4.110099980824831e-06,
"loss": 0.5937,
"step": 9001
},
{
"epoch": 0.57,
"grad_norm": 0.8802381753921509,
"learning_rate": 4.109090386793652e-06,
"loss": 0.56,
"step": 9002
},
{
"epoch": 0.57,
"grad_norm": 0.880913496017456,
"learning_rate": 4.108080830275576e-06,
"loss": 0.6136,
"step": 9003
},
{
"epoch": 0.57,
"grad_norm": 0.9269407987594604,
"learning_rate": 4.107071311313113e-06,
"loss": 0.5653,
"step": 9004
},
{
"epoch": 0.57,
"grad_norm": 0.9228689670562744,
"learning_rate": 4.106061829948773e-06,
"loss": 0.6108,
"step": 9005
},
{
"epoch": 0.57,
"grad_norm": 0.921231746673584,
"learning_rate": 4.10505238622506e-06,
"loss": 0.5842,
"step": 9006
},
{
"epoch": 0.57,
"grad_norm": 0.8868432641029358,
"learning_rate": 4.104042980184476e-06,
"loss": 0.6736,
"step": 9007
},
{
"epoch": 0.57,
"grad_norm": 0.8439784646034241,
"learning_rate": 4.103033611869525e-06,
"loss": 0.5566,
"step": 9008
},
{
"epoch": 0.57,
"grad_norm": 0.8885878920555115,
"learning_rate": 4.1020242813227096e-06,
"loss": 0.5977,
"step": 9009
},
{
"epoch": 0.57,
"grad_norm": 0.8744617700576782,
"learning_rate": 4.101014988586528e-06,
"loss": 0.5894,
"step": 9010
},
{
"epoch": 0.57,
"grad_norm": 0.8461993932723999,
"learning_rate": 4.100005733703477e-06,
"loss": 0.5497,
"step": 9011
},
{
"epoch": 0.57,
"grad_norm": 0.8714662790298462,
"learning_rate": 4.0989965167160526e-06,
"loss": 0.5959,
"step": 9012
},
{
"epoch": 0.57,
"grad_norm": 0.883986234664917,
"learning_rate": 4.097987337666753e-06,
"loss": 0.5854,
"step": 9013
},
{
"epoch": 0.57,
"grad_norm": 0.8724504113197327,
"learning_rate": 4.096978196598068e-06,
"loss": 0.5916,
"step": 9014
},
{
"epoch": 0.57,
"grad_norm": 0.9002840518951416,
"learning_rate": 4.09596909355249e-06,
"loss": 0.5975,
"step": 9015
},
{
"epoch": 0.57,
"grad_norm": 0.8268336057662964,
"learning_rate": 4.094960028572506e-06,
"loss": 0.5894,
"step": 9016
},
{
"epoch": 0.57,
"grad_norm": 0.8918128609657288,
"learning_rate": 4.0939510017006095e-06,
"loss": 0.5321,
"step": 9017
},
{
"epoch": 0.57,
"grad_norm": 0.8402930498123169,
"learning_rate": 4.092942012979285e-06,
"loss": 0.5594,
"step": 9018
},
{
"epoch": 0.57,
"grad_norm": 0.9471001029014587,
"learning_rate": 4.091933062451015e-06,
"loss": 0.5805,
"step": 9019
},
{
"epoch": 0.57,
"grad_norm": 0.9170734286308289,
"learning_rate": 4.0909241501582865e-06,
"loss": 0.6064,
"step": 9020
},
{
"epoch": 0.57,
"grad_norm": 0.9813190698623657,
"learning_rate": 4.089915276143577e-06,
"loss": 0.6328,
"step": 9021
},
{
"epoch": 0.57,
"grad_norm": 0.9079948663711548,
"learning_rate": 4.088906440449371e-06,
"loss": 0.5542,
"step": 9022
},
{
"epoch": 0.57,
"grad_norm": 0.855984091758728,
"learning_rate": 4.087897643118145e-06,
"loss": 0.5719,
"step": 9023
},
{
"epoch": 0.57,
"grad_norm": 0.8991562724113464,
"learning_rate": 4.086888884192377e-06,
"loss": 0.5942,
"step": 9024
},
{
"epoch": 0.57,
"grad_norm": 0.9502757787704468,
"learning_rate": 4.0858801637145395e-06,
"loss": 0.5597,
"step": 9025
},
{
"epoch": 0.57,
"grad_norm": 0.910291314125061,
"learning_rate": 4.084871481727111e-06,
"loss": 0.5413,
"step": 9026
},
{
"epoch": 0.57,
"grad_norm": 0.8761973977088928,
"learning_rate": 4.083862838272559e-06,
"loss": 0.588,
"step": 9027
},
{
"epoch": 0.57,
"grad_norm": 0.935142993927002,
"learning_rate": 4.082854233393358e-06,
"loss": 0.5596,
"step": 9028
},
{
"epoch": 0.57,
"grad_norm": 0.9134296178817749,
"learning_rate": 4.081845667131971e-06,
"loss": 0.6049,
"step": 9029
},
{
"epoch": 0.57,
"grad_norm": 0.9424194693565369,
"learning_rate": 4.080837139530872e-06,
"loss": 0.5892,
"step": 9030
},
{
"epoch": 0.57,
"grad_norm": 0.8782743215560913,
"learning_rate": 4.0798286506325225e-06,
"loss": 0.5253,
"step": 9031
},
{
"epoch": 0.57,
"grad_norm": 0.8680989742279053,
"learning_rate": 4.078820200479389e-06,
"loss": 0.588,
"step": 9032
},
{
"epoch": 0.57,
"grad_norm": 0.905407726764679,
"learning_rate": 4.077811789113929e-06,
"loss": 0.6169,
"step": 9033
},
{
"epoch": 0.57,
"grad_norm": 0.9795319437980652,
"learning_rate": 4.076803416578608e-06,
"loss": 0.627,
"step": 9034
},
{
"epoch": 0.57,
"grad_norm": 0.8296229243278503,
"learning_rate": 4.0757950829158855e-06,
"loss": 0.5553,
"step": 9035
},
{
"epoch": 0.57,
"grad_norm": 0.8955538272857666,
"learning_rate": 4.074786788168216e-06,
"loss": 0.6238,
"step": 9036
},
{
"epoch": 0.57,
"grad_norm": 0.8968479037284851,
"learning_rate": 4.073778532378056e-06,
"loss": 0.619,
"step": 9037
},
{
"epoch": 0.57,
"grad_norm": 0.9429267644882202,
"learning_rate": 4.072770315587858e-06,
"loss": 0.5866,
"step": 9038
},
{
"epoch": 0.57,
"grad_norm": 0.8396599292755127,
"learning_rate": 4.071762137840079e-06,
"loss": 0.5358,
"step": 9039
},
{
"epoch": 0.57,
"grad_norm": 0.8964661955833435,
"learning_rate": 4.070753999177167e-06,
"loss": 0.5945,
"step": 9040
},
{
"epoch": 0.57,
"grad_norm": 0.8709607720375061,
"learning_rate": 4.069745899641571e-06,
"loss": 0.5417,
"step": 9041
},
{
"epoch": 0.57,
"grad_norm": 0.9348841309547424,
"learning_rate": 4.0687378392757374e-06,
"loss": 0.6048,
"step": 9042
},
{
"epoch": 0.57,
"grad_norm": 0.8811603784561157,
"learning_rate": 4.0677298181221155e-06,
"loss": 0.5896,
"step": 9043
},
{
"epoch": 0.57,
"grad_norm": 0.8758918046951294,
"learning_rate": 4.066721836223149e-06,
"loss": 0.5902,
"step": 9044
},
{
"epoch": 0.57,
"grad_norm": 0.9369110465049744,
"learning_rate": 4.065713893621278e-06,
"loss": 0.6596,
"step": 9045
},
{
"epoch": 0.57,
"grad_norm": 0.8938471674919128,
"learning_rate": 4.064705990358943e-06,
"loss": 0.5437,
"step": 9046
},
{
"epoch": 0.57,
"grad_norm": 0.8944480419158936,
"learning_rate": 4.063698126478587e-06,
"loss": 0.5737,
"step": 9047
},
{
"epoch": 0.57,
"grad_norm": 0.9232917428016663,
"learning_rate": 4.062690302022647e-06,
"loss": 0.5618,
"step": 9048
},
{
"epoch": 0.57,
"grad_norm": 0.8441494107246399,
"learning_rate": 4.0616825170335565e-06,
"loss": 0.5753,
"step": 9049
},
{
"epoch": 0.57,
"grad_norm": 0.8715497255325317,
"learning_rate": 4.060674771553751e-06,
"loss": 0.5928,
"step": 9050
},
{
"epoch": 0.57,
"grad_norm": 0.863179087638855,
"learning_rate": 4.059667065625662e-06,
"loss": 0.5379,
"step": 9051
},
{
"epoch": 0.57,
"grad_norm": 0.8319960236549377,
"learning_rate": 4.058659399291724e-06,
"loss": 0.5393,
"step": 9052
},
{
"epoch": 0.57,
"grad_norm": 0.9125126004219055,
"learning_rate": 4.057651772594362e-06,
"loss": 0.6624,
"step": 9053
},
{
"epoch": 0.57,
"grad_norm": 0.8907890915870667,
"learning_rate": 4.056644185576007e-06,
"loss": 0.6439,
"step": 9054
},
{
"epoch": 0.57,
"grad_norm": 0.8446599841117859,
"learning_rate": 4.055636638279082e-06,
"loss": 0.537,
"step": 9055
},
{
"epoch": 0.57,
"grad_norm": 0.9353048801422119,
"learning_rate": 4.054629130746015e-06,
"loss": 0.6079,
"step": 9056
},
{
"epoch": 0.57,
"grad_norm": 0.9255784749984741,
"learning_rate": 4.053621663019225e-06,
"loss": 0.5508,
"step": 9057
},
{
"epoch": 0.57,
"grad_norm": 0.8582807183265686,
"learning_rate": 4.052614235141136e-06,
"loss": 0.5567,
"step": 9058
},
{
"epoch": 0.57,
"grad_norm": 0.8390825390815735,
"learning_rate": 4.051606847154164e-06,
"loss": 0.5467,
"step": 9059
},
{
"epoch": 0.57,
"grad_norm": 0.8816949129104614,
"learning_rate": 4.05059949910073e-06,
"loss": 0.6016,
"step": 9060
},
{
"epoch": 0.57,
"grad_norm": 0.875059187412262,
"learning_rate": 4.049592191023247e-06,
"loss": 0.5172,
"step": 9061
},
{
"epoch": 0.57,
"grad_norm": 0.9037113189697266,
"learning_rate": 4.0485849229641325e-06,
"loss": 0.5985,
"step": 9062
},
{
"epoch": 0.57,
"grad_norm": 0.8470078110694885,
"learning_rate": 4.047577694965794e-06,
"loss": 0.5989,
"step": 9063
},
{
"epoch": 0.57,
"grad_norm": 0.9105969071388245,
"learning_rate": 4.046570507070649e-06,
"loss": 0.6467,
"step": 9064
},
{
"epoch": 0.57,
"grad_norm": 0.8537124395370483,
"learning_rate": 4.045563359321102e-06,
"loss": 0.553,
"step": 9065
},
{
"epoch": 0.57,
"grad_norm": 0.917719304561615,
"learning_rate": 4.044556251759562e-06,
"loss": 0.6008,
"step": 9066
},
{
"epoch": 0.57,
"grad_norm": 0.8611663579940796,
"learning_rate": 4.043549184428434e-06,
"loss": 0.582,
"step": 9067
},
{
"epoch": 0.57,
"grad_norm": 0.9147241115570068,
"learning_rate": 4.042542157370122e-06,
"loss": 0.6259,
"step": 9068
},
{
"epoch": 0.57,
"grad_norm": 0.9152358770370483,
"learning_rate": 4.041535170627029e-06,
"loss": 0.5821,
"step": 9069
},
{
"epoch": 0.57,
"grad_norm": 0.8824336528778076,
"learning_rate": 4.040528224241558e-06,
"loss": 0.5594,
"step": 9070
},
{
"epoch": 0.57,
"grad_norm": 0.805570662021637,
"learning_rate": 4.039521318256104e-06,
"loss": 0.4743,
"step": 9071
},
{
"epoch": 0.57,
"grad_norm": 0.9145142436027527,
"learning_rate": 4.038514452713065e-06,
"loss": 0.569,
"step": 9072
},
{
"epoch": 0.57,
"grad_norm": 0.8494529128074646,
"learning_rate": 4.037507627654838e-06,
"loss": 0.5572,
"step": 9073
},
{
"epoch": 0.57,
"grad_norm": 0.897002637386322,
"learning_rate": 4.0365008431238184e-06,
"loss": 0.6247,
"step": 9074
},
{
"epoch": 0.57,
"grad_norm": 0.9091158509254456,
"learning_rate": 4.035494099162396e-06,
"loss": 0.6193,
"step": 9075
},
{
"epoch": 0.58,
"grad_norm": 0.8396137952804565,
"learning_rate": 4.03448739581296e-06,
"loss": 0.5578,
"step": 9076
},
{
"epoch": 0.58,
"grad_norm": 0.8554840683937073,
"learning_rate": 4.033480733117902e-06,
"loss": 0.5752,
"step": 9077
},
{
"epoch": 0.58,
"grad_norm": 0.8953068852424622,
"learning_rate": 4.032474111119609e-06,
"loss": 0.6019,
"step": 9078
},
{
"epoch": 0.58,
"grad_norm": 0.9367779493331909,
"learning_rate": 4.031467529860466e-06,
"loss": 0.6226,
"step": 9079
},
{
"epoch": 0.58,
"grad_norm": 0.8574473857879639,
"learning_rate": 4.030460989382853e-06,
"loss": 0.5676,
"step": 9080
},
{
"epoch": 0.58,
"grad_norm": 0.8605020046234131,
"learning_rate": 4.029454489729156e-06,
"loss": 0.5517,
"step": 9081
},
{
"epoch": 0.58,
"grad_norm": 0.890446126461029,
"learning_rate": 4.028448030941756e-06,
"loss": 0.5869,
"step": 9082
},
{
"epoch": 0.58,
"grad_norm": 0.8330382704734802,
"learning_rate": 4.027441613063029e-06,
"loss": 0.582,
"step": 9083
},
{
"epoch": 0.58,
"grad_norm": 0.8821123242378235,
"learning_rate": 4.026435236135351e-06,
"loss": 0.6191,
"step": 9084
},
{
"epoch": 0.58,
"grad_norm": 0.9941993355751038,
"learning_rate": 4.025428900201098e-06,
"loss": 0.6336,
"step": 9085
},
{
"epoch": 0.58,
"grad_norm": 0.9467496275901794,
"learning_rate": 4.024422605302646e-06,
"loss": 0.629,
"step": 9086
},
{
"epoch": 0.58,
"grad_norm": 0.8554012179374695,
"learning_rate": 4.023416351482364e-06,
"loss": 0.533,
"step": 9087
},
{
"epoch": 0.58,
"grad_norm": 0.8936252593994141,
"learning_rate": 4.022410138782621e-06,
"loss": 0.6261,
"step": 9088
},
{
"epoch": 0.58,
"grad_norm": 0.8527365922927856,
"learning_rate": 4.021403967245786e-06,
"loss": 0.54,
"step": 9089
},
{
"epoch": 0.58,
"grad_norm": 0.8804008364677429,
"learning_rate": 4.020397836914227e-06,
"loss": 0.5582,
"step": 9090
},
{
"epoch": 0.58,
"grad_norm": 0.8681939840316772,
"learning_rate": 4.019391747830307e-06,
"loss": 0.567,
"step": 9091
},
{
"epoch": 0.58,
"grad_norm": 0.9508828520774841,
"learning_rate": 4.018385700036389e-06,
"loss": 0.611,
"step": 9092
},
{
"epoch": 0.58,
"grad_norm": 0.8974758982658386,
"learning_rate": 4.017379693574833e-06,
"loss": 0.5596,
"step": 9093
},
{
"epoch": 0.58,
"grad_norm": 0.8851727247238159,
"learning_rate": 4.016373728488002e-06,
"loss": 0.6184,
"step": 9094
},
{
"epoch": 0.58,
"grad_norm": 0.8081380724906921,
"learning_rate": 4.01536780481825e-06,
"loss": 0.5798,
"step": 9095
},
{
"epoch": 0.58,
"grad_norm": 0.8617830276489258,
"learning_rate": 4.014361922607936e-06,
"loss": 0.6021,
"step": 9096
},
{
"epoch": 0.58,
"grad_norm": 0.895240306854248,
"learning_rate": 4.013356081899412e-06,
"loss": 0.5756,
"step": 9097
},
{
"epoch": 0.58,
"grad_norm": 0.8988040089607239,
"learning_rate": 4.0123502827350295e-06,
"loss": 0.5726,
"step": 9098
},
{
"epoch": 0.58,
"grad_norm": 0.932150661945343,
"learning_rate": 4.011344525157141e-06,
"loss": 0.6415,
"step": 9099
},
{
"epoch": 0.58,
"grad_norm": 0.9083916544914246,
"learning_rate": 4.010338809208098e-06,
"loss": 0.6164,
"step": 9100
},
{
"epoch": 0.58,
"grad_norm": 0.8748500943183899,
"learning_rate": 4.009333134930244e-06,
"loss": 0.5558,
"step": 9101
},
{
"epoch": 0.58,
"grad_norm": 0.905542254447937,
"learning_rate": 4.0083275023659236e-06,
"loss": 0.6417,
"step": 9102
},
{
"epoch": 0.58,
"grad_norm": 0.8471083045005798,
"learning_rate": 4.007321911557483e-06,
"loss": 0.6012,
"step": 9103
},
{
"epoch": 0.58,
"grad_norm": 0.8830850124359131,
"learning_rate": 4.0063163625472645e-06,
"loss": 0.5628,
"step": 9104
},
{
"epoch": 0.58,
"grad_norm": 0.8656706213951111,
"learning_rate": 4.005310855377608e-06,
"loss": 0.6062,
"step": 9105
},
{
"epoch": 0.58,
"grad_norm": 0.8246329426765442,
"learning_rate": 4.004305390090848e-06,
"loss": 0.5715,
"step": 9106
},
{
"epoch": 0.58,
"grad_norm": 0.9124095439910889,
"learning_rate": 4.003299966729325e-06,
"loss": 0.5901,
"step": 9107
},
{
"epoch": 0.58,
"grad_norm": 0.9204335808753967,
"learning_rate": 4.002294585335375e-06,
"loss": 0.5478,
"step": 9108
},
{
"epoch": 0.58,
"grad_norm": 0.9215397834777832,
"learning_rate": 4.001289245951329e-06,
"loss": 0.5763,
"step": 9109
},
{
"epoch": 0.58,
"grad_norm": 0.9767603278160095,
"learning_rate": 4.000283948619517e-06,
"loss": 0.5892,
"step": 9110
},
{
"epoch": 0.58,
"grad_norm": 0.8919650316238403,
"learning_rate": 3.99927869338227e-06,
"loss": 0.551,
"step": 9111
},
{
"epoch": 0.58,
"grad_norm": 0.9372937679290771,
"learning_rate": 3.998273480281919e-06,
"loss": 0.5895,
"step": 9112
},
{
"epoch": 0.58,
"grad_norm": 1.0142635107040405,
"learning_rate": 3.997268309360785e-06,
"loss": 0.5965,
"step": 9113
},
{
"epoch": 0.58,
"grad_norm": 0.8675452470779419,
"learning_rate": 3.996263180661194e-06,
"loss": 0.5961,
"step": 9114
},
{
"epoch": 0.58,
"grad_norm": 0.847707211971283,
"learning_rate": 3.995258094225468e-06,
"loss": 0.5375,
"step": 9115
},
{
"epoch": 0.58,
"grad_norm": 0.8482071161270142,
"learning_rate": 3.99425305009593e-06,
"loss": 0.5372,
"step": 9116
},
{
"epoch": 0.58,
"grad_norm": 0.9109798669815063,
"learning_rate": 3.993248048314897e-06,
"loss": 0.5797,
"step": 9117
},
{
"epoch": 0.58,
"grad_norm": 0.9148001670837402,
"learning_rate": 3.992243088924686e-06,
"loss": 0.575,
"step": 9118
},
{
"epoch": 0.58,
"grad_norm": 0.8979611396789551,
"learning_rate": 3.991238171967612e-06,
"loss": 0.5755,
"step": 9119
},
{
"epoch": 0.58,
"grad_norm": 0.9315516352653503,
"learning_rate": 3.9902332974859906e-06,
"loss": 0.6237,
"step": 9120
},
{
"epoch": 0.58,
"grad_norm": 0.8877137899398804,
"learning_rate": 3.989228465522133e-06,
"loss": 0.6188,
"step": 9121
},
{
"epoch": 0.58,
"grad_norm": 0.9431552886962891,
"learning_rate": 3.9882236761183476e-06,
"loss": 0.6138,
"step": 9122
},
{
"epoch": 0.58,
"grad_norm": 0.877837061882019,
"learning_rate": 3.987218929316942e-06,
"loss": 0.6009,
"step": 9123
},
{
"epoch": 0.58,
"grad_norm": 0.8428844809532166,
"learning_rate": 3.986214225160226e-06,
"loss": 0.565,
"step": 9124
},
{
"epoch": 0.58,
"grad_norm": 0.9297831654548645,
"learning_rate": 3.9852095636905026e-06,
"loss": 0.6212,
"step": 9125
},
{
"epoch": 0.58,
"grad_norm": 0.9488120079040527,
"learning_rate": 3.984204944950073e-06,
"loss": 0.63,
"step": 9126
},
{
"epoch": 0.58,
"grad_norm": 0.9093875288963318,
"learning_rate": 3.983200368981241e-06,
"loss": 0.6243,
"step": 9127
},
{
"epoch": 0.58,
"grad_norm": 0.8893300294876099,
"learning_rate": 3.982195835826302e-06,
"loss": 0.5697,
"step": 9128
},
{
"epoch": 0.58,
"grad_norm": 0.8863883018493652,
"learning_rate": 3.981191345527558e-06,
"loss": 0.6158,
"step": 9129
},
{
"epoch": 0.58,
"grad_norm": 0.98292475938797,
"learning_rate": 3.9801868981273e-06,
"loss": 0.6121,
"step": 9130
},
{
"epoch": 0.58,
"grad_norm": 0.8693172335624695,
"learning_rate": 3.979182493667826e-06,
"loss": 0.5718,
"step": 9131
},
{
"epoch": 0.58,
"grad_norm": 0.9281517863273621,
"learning_rate": 3.978178132191424e-06,
"loss": 0.5888,
"step": 9132
},
{
"epoch": 0.58,
"grad_norm": 0.9215491414070129,
"learning_rate": 3.9771738137403885e-06,
"loss": 0.5927,
"step": 9133
},
{
"epoch": 0.58,
"grad_norm": 0.9008755683898926,
"learning_rate": 3.976169538357004e-06,
"loss": 0.5982,
"step": 9134
},
{
"epoch": 0.58,
"grad_norm": 0.9302978515625,
"learning_rate": 3.97516530608356e-06,
"loss": 0.6214,
"step": 9135
},
{
"epoch": 0.58,
"grad_norm": 0.8978670239448547,
"learning_rate": 3.974161116962337e-06,
"loss": 0.6003,
"step": 9136
},
{
"epoch": 0.58,
"grad_norm": 0.851223886013031,
"learning_rate": 3.973156971035623e-06,
"loss": 0.5936,
"step": 9137
},
{
"epoch": 0.58,
"grad_norm": 0.8626120686531067,
"learning_rate": 3.9721528683456966e-06,
"loss": 0.5732,
"step": 9138
},
{
"epoch": 0.58,
"grad_norm": 0.9312442541122437,
"learning_rate": 3.971148808934838e-06,
"loss": 0.6243,
"step": 9139
},
{
"epoch": 0.58,
"grad_norm": 0.8899321556091309,
"learning_rate": 3.970144792845322e-06,
"loss": 0.5334,
"step": 9140
},
{
"epoch": 0.58,
"grad_norm": 0.8440714478492737,
"learning_rate": 3.9691408201194275e-06,
"loss": 0.6048,
"step": 9141
},
{
"epoch": 0.58,
"grad_norm": 0.8218972086906433,
"learning_rate": 3.968136890799426e-06,
"loss": 0.5506,
"step": 9142
},
{
"epoch": 0.58,
"grad_norm": 0.8766604661941528,
"learning_rate": 3.967133004927592e-06,
"loss": 0.5832,
"step": 9143
},
{
"epoch": 0.58,
"grad_norm": 0.9405858516693115,
"learning_rate": 3.9661291625461945e-06,
"loss": 0.6241,
"step": 9144
},
{
"epoch": 0.58,
"grad_norm": 0.8780211806297302,
"learning_rate": 3.965125363697499e-06,
"loss": 0.599,
"step": 9145
},
{
"epoch": 0.58,
"grad_norm": 0.8795492649078369,
"learning_rate": 3.964121608423775e-06,
"loss": 0.6152,
"step": 9146
},
{
"epoch": 0.58,
"grad_norm": 0.8778110146522522,
"learning_rate": 3.963117896767288e-06,
"loss": 0.6185,
"step": 9147
},
{
"epoch": 0.58,
"grad_norm": 0.8248224258422852,
"learning_rate": 3.962114228770299e-06,
"loss": 0.5828,
"step": 9148
},
{
"epoch": 0.58,
"grad_norm": 0.8475858569145203,
"learning_rate": 3.961110604475067e-06,
"loss": 0.5455,
"step": 9149
},
{
"epoch": 0.58,
"grad_norm": 0.8509166240692139,
"learning_rate": 3.960107023923855e-06,
"loss": 0.6004,
"step": 9150
},
{
"epoch": 0.58,
"grad_norm": 0.8684119582176208,
"learning_rate": 3.959103487158919e-06,
"loss": 0.5847,
"step": 9151
},
{
"epoch": 0.58,
"grad_norm": 0.799341082572937,
"learning_rate": 3.958099994222515e-06,
"loss": 0.5715,
"step": 9152
},
{
"epoch": 0.58,
"grad_norm": 0.8934925198554993,
"learning_rate": 3.957096545156893e-06,
"loss": 0.6094,
"step": 9153
},
{
"epoch": 0.58,
"grad_norm": 0.9568246603012085,
"learning_rate": 3.956093140004308e-06,
"loss": 0.5888,
"step": 9154
},
{
"epoch": 0.58,
"grad_norm": 0.9072986245155334,
"learning_rate": 3.955089778807012e-06,
"loss": 0.597,
"step": 9155
},
{
"epoch": 0.58,
"grad_norm": 0.8377887010574341,
"learning_rate": 3.954086461607248e-06,
"loss": 0.5394,
"step": 9156
},
{
"epoch": 0.58,
"grad_norm": 0.8361679315567017,
"learning_rate": 3.9530831884472655e-06,
"loss": 0.5454,
"step": 9157
},
{
"epoch": 0.58,
"grad_norm": 0.9167816638946533,
"learning_rate": 3.952079959369308e-06,
"loss": 0.5748,
"step": 9158
},
{
"epoch": 0.58,
"grad_norm": 1.0050288438796997,
"learning_rate": 3.951076774415619e-06,
"loss": 0.603,
"step": 9159
},
{
"epoch": 0.58,
"grad_norm": 0.8782682418823242,
"learning_rate": 3.950073633628436e-06,
"loss": 0.6133,
"step": 9160
},
{
"epoch": 0.58,
"grad_norm": 0.8905148506164551,
"learning_rate": 3.949070537050002e-06,
"loss": 0.6327,
"step": 9161
},
{
"epoch": 0.58,
"grad_norm": 0.9059675335884094,
"learning_rate": 3.948067484722549e-06,
"loss": 0.638,
"step": 9162
},
{
"epoch": 0.58,
"grad_norm": 0.8948028087615967,
"learning_rate": 3.947064476688318e-06,
"loss": 0.5723,
"step": 9163
},
{
"epoch": 0.58,
"grad_norm": 0.9257702231407166,
"learning_rate": 3.946061512989537e-06,
"loss": 0.6301,
"step": 9164
},
{
"epoch": 0.58,
"grad_norm": 0.8525533676147461,
"learning_rate": 3.94505859366844e-06,
"loss": 0.5937,
"step": 9165
},
{
"epoch": 0.58,
"grad_norm": 0.8956154584884644,
"learning_rate": 3.944055718767255e-06,
"loss": 0.5908,
"step": 9166
},
{
"epoch": 0.58,
"grad_norm": 0.8894206881523132,
"learning_rate": 3.943052888328211e-06,
"loss": 0.5536,
"step": 9167
},
{
"epoch": 0.58,
"grad_norm": 0.8910138010978699,
"learning_rate": 3.942050102393533e-06,
"loss": 0.6453,
"step": 9168
},
{
"epoch": 0.58,
"grad_norm": 0.8522058129310608,
"learning_rate": 3.941047361005445e-06,
"loss": 0.5562,
"step": 9169
},
{
"epoch": 0.58,
"grad_norm": 0.9396253228187561,
"learning_rate": 3.940044664206168e-06,
"loss": 0.6234,
"step": 9170
},
{
"epoch": 0.58,
"grad_norm": 0.914997398853302,
"learning_rate": 3.939042012037924e-06,
"loss": 0.5733,
"step": 9171
},
{
"epoch": 0.58,
"grad_norm": 0.8962453603744507,
"learning_rate": 3.938039404542929e-06,
"loss": 0.5559,
"step": 9172
},
{
"epoch": 0.58,
"grad_norm": 0.9689484238624573,
"learning_rate": 3.937036841763401e-06,
"loss": 0.5797,
"step": 9173
},
{
"epoch": 0.58,
"grad_norm": 0.8645898103713989,
"learning_rate": 3.936034323741555e-06,
"loss": 0.5752,
"step": 9174
},
{
"epoch": 0.58,
"grad_norm": 0.9438555240631104,
"learning_rate": 3.935031850519599e-06,
"loss": 0.5716,
"step": 9175
},
{
"epoch": 0.58,
"grad_norm": 1.1834338903427124,
"learning_rate": 3.934029422139749e-06,
"loss": 0.5701,
"step": 9176
},
{
"epoch": 0.58,
"grad_norm": 0.954289972782135,
"learning_rate": 3.933027038644213e-06,
"loss": 0.5488,
"step": 9177
},
{
"epoch": 0.58,
"grad_norm": 0.9095122218132019,
"learning_rate": 3.932024700075196e-06,
"loss": 0.5646,
"step": 9178
},
{
"epoch": 0.58,
"grad_norm": 0.8213743567466736,
"learning_rate": 3.931022406474902e-06,
"loss": 0.5665,
"step": 9179
},
{
"epoch": 0.58,
"grad_norm": 0.9303811192512512,
"learning_rate": 3.930020157885537e-06,
"loss": 0.5638,
"step": 9180
},
{
"epoch": 0.58,
"grad_norm": 0.8715723156929016,
"learning_rate": 3.929017954349301e-06,
"loss": 0.5983,
"step": 9181
},
{
"epoch": 0.58,
"grad_norm": 0.9098017811775208,
"learning_rate": 3.928015795908394e-06,
"loss": 0.566,
"step": 9182
},
{
"epoch": 0.58,
"grad_norm": 0.8678735494613647,
"learning_rate": 3.927013682605011e-06,
"loss": 0.5558,
"step": 9183
},
{
"epoch": 0.58,
"grad_norm": 0.870380699634552,
"learning_rate": 3.9260116144813495e-06,
"loss": 0.6038,
"step": 9184
},
{
"epoch": 0.58,
"grad_norm": 0.8809983730316162,
"learning_rate": 3.925009591579604e-06,
"loss": 0.5607,
"step": 9185
},
{
"epoch": 0.58,
"grad_norm": 0.8992043137550354,
"learning_rate": 3.9240076139419655e-06,
"loss": 0.5617,
"step": 9186
},
{
"epoch": 0.58,
"grad_norm": 0.884871244430542,
"learning_rate": 3.92300568161062e-06,
"loss": 0.5958,
"step": 9187
},
{
"epoch": 0.58,
"grad_norm": 0.9241304993629456,
"learning_rate": 3.9220037946277606e-06,
"loss": 0.5833,
"step": 9188
},
{
"epoch": 0.58,
"grad_norm": 0.837876558303833,
"learning_rate": 3.921001953035573e-06,
"loss": 0.5648,
"step": 9189
},
{
"epoch": 0.58,
"grad_norm": 0.8822311162948608,
"learning_rate": 3.920000156876238e-06,
"loss": 0.5887,
"step": 9190
},
{
"epoch": 0.58,
"grad_norm": 0.8584680557250977,
"learning_rate": 3.91899840619194e-06,
"loss": 0.5667,
"step": 9191
},
{
"epoch": 0.58,
"grad_norm": 0.9705455303192139,
"learning_rate": 3.9179967010248556e-06,
"loss": 0.6367,
"step": 9192
},
{
"epoch": 0.58,
"grad_norm": 0.8703861236572266,
"learning_rate": 3.91699504141717e-06,
"loss": 0.5526,
"step": 9193
},
{
"epoch": 0.58,
"grad_norm": 0.8837298154830933,
"learning_rate": 3.915993427411054e-06,
"loss": 0.6264,
"step": 9194
},
{
"epoch": 0.58,
"grad_norm": 0.9295274615287781,
"learning_rate": 3.914991859048684e-06,
"loss": 0.5771,
"step": 9195
},
{
"epoch": 0.58,
"grad_norm": 0.8776589035987854,
"learning_rate": 3.913990336372231e-06,
"loss": 0.562,
"step": 9196
},
{
"epoch": 0.58,
"grad_norm": 0.9219998717308044,
"learning_rate": 3.912988859423869e-06,
"loss": 0.5872,
"step": 9197
},
{
"epoch": 0.58,
"grad_norm": 0.9244682788848877,
"learning_rate": 3.911987428245765e-06,
"loss": 0.5853,
"step": 9198
},
{
"epoch": 0.58,
"grad_norm": 0.908510148525238,
"learning_rate": 3.9109860428800845e-06,
"loss": 0.5842,
"step": 9199
},
{
"epoch": 0.58,
"grad_norm": 0.8179001212120056,
"learning_rate": 3.909984703368992e-06,
"loss": 0.5537,
"step": 9200
},
{
"epoch": 0.58,
"grad_norm": 0.8670381903648376,
"learning_rate": 3.9089834097546534e-06,
"loss": 0.5738,
"step": 9201
},
{
"epoch": 0.58,
"grad_norm": 0.9407733678817749,
"learning_rate": 3.907982162079229e-06,
"loss": 0.5843,
"step": 9202
},
{
"epoch": 0.58,
"grad_norm": 0.8975993394851685,
"learning_rate": 3.906980960384875e-06,
"loss": 0.592,
"step": 9203
},
{
"epoch": 0.58,
"grad_norm": 0.9089202880859375,
"learning_rate": 3.90597980471375e-06,
"loss": 0.5626,
"step": 9204
},
{
"epoch": 0.58,
"grad_norm": 0.9043983221054077,
"learning_rate": 3.904978695108011e-06,
"loss": 0.6234,
"step": 9205
},
{
"epoch": 0.58,
"grad_norm": 0.8751869201660156,
"learning_rate": 3.9039776316098104e-06,
"loss": 0.584,
"step": 9206
},
{
"epoch": 0.58,
"grad_norm": 0.9038695693016052,
"learning_rate": 3.902976614261298e-06,
"loss": 0.5378,
"step": 9207
},
{
"epoch": 0.58,
"grad_norm": 0.920074462890625,
"learning_rate": 3.901975643104625e-06,
"loss": 0.5855,
"step": 9208
},
{
"epoch": 0.58,
"grad_norm": 0.8642706871032715,
"learning_rate": 3.9009747181819355e-06,
"loss": 0.5913,
"step": 9209
},
{
"epoch": 0.58,
"grad_norm": 0.9230958223342896,
"learning_rate": 3.8999738395353795e-06,
"loss": 0.6275,
"step": 9210
},
{
"epoch": 0.58,
"grad_norm": 0.8580319285392761,
"learning_rate": 3.898973007207097e-06,
"loss": 0.5848,
"step": 9211
},
{
"epoch": 0.58,
"grad_norm": 0.9039139747619629,
"learning_rate": 3.897972221239233e-06,
"loss": 0.6364,
"step": 9212
},
{
"epoch": 0.58,
"grad_norm": 0.8444435596466064,
"learning_rate": 3.896971481673923e-06,
"loss": 0.5956,
"step": 9213
},
{
"epoch": 0.58,
"grad_norm": 0.832820475101471,
"learning_rate": 3.895970788553308e-06,
"loss": 0.5442,
"step": 9214
},
{
"epoch": 0.58,
"grad_norm": 0.9238991141319275,
"learning_rate": 3.894970141919522e-06,
"loss": 0.5961,
"step": 9215
},
{
"epoch": 0.58,
"grad_norm": 0.8753307461738586,
"learning_rate": 3.8939695418147e-06,
"loss": 0.5827,
"step": 9216
},
{
"epoch": 0.58,
"grad_norm": 0.8677202463150024,
"learning_rate": 3.892968988280971e-06,
"loss": 0.6043,
"step": 9217
},
{
"epoch": 0.58,
"grad_norm": 0.8751778602600098,
"learning_rate": 3.891968481360469e-06,
"loss": 0.6046,
"step": 9218
},
{
"epoch": 0.58,
"grad_norm": 0.8408955931663513,
"learning_rate": 3.890968021095318e-06,
"loss": 0.5691,
"step": 9219
},
{
"epoch": 0.58,
"grad_norm": 0.8872222900390625,
"learning_rate": 3.889967607527648e-06,
"loss": 0.5943,
"step": 9220
},
{
"epoch": 0.58,
"grad_norm": 0.8292039036750793,
"learning_rate": 3.888967240699578e-06,
"loss": 0.5651,
"step": 9221
},
{
"epoch": 0.58,
"grad_norm": 0.8514560461044312,
"learning_rate": 3.887966920653234e-06,
"loss": 0.5667,
"step": 9222
},
{
"epoch": 0.58,
"grad_norm": 0.8210045695304871,
"learning_rate": 3.886966647430733e-06,
"loss": 0.4967,
"step": 9223
},
{
"epoch": 0.58,
"grad_norm": 0.9208805561065674,
"learning_rate": 3.8859664210741965e-06,
"loss": 0.537,
"step": 9224
},
{
"epoch": 0.58,
"grad_norm": 1.0000219345092773,
"learning_rate": 3.884966241625737e-06,
"loss": 0.6296,
"step": 9225
},
{
"epoch": 0.58,
"grad_norm": 0.9203490018844604,
"learning_rate": 3.88396610912747e-06,
"loss": 0.5744,
"step": 9226
},
{
"epoch": 0.58,
"grad_norm": 0.8076592087745667,
"learning_rate": 3.882966023621509e-06,
"loss": 0.5488,
"step": 9227
},
{
"epoch": 0.58,
"grad_norm": 0.8537278771400452,
"learning_rate": 3.881965985149962e-06,
"loss": 0.5721,
"step": 9228
},
{
"epoch": 0.58,
"grad_norm": 0.9415518641471863,
"learning_rate": 3.880965993754939e-06,
"loss": 0.6237,
"step": 9229
},
{
"epoch": 0.58,
"grad_norm": 0.8622970581054688,
"learning_rate": 3.879966049478544e-06,
"loss": 0.5502,
"step": 9230
},
{
"epoch": 0.58,
"grad_norm": 0.9145261645317078,
"learning_rate": 3.878966152362882e-06,
"loss": 0.5996,
"step": 9231
},
{
"epoch": 0.58,
"grad_norm": 0.8953229784965515,
"learning_rate": 3.877966302450057e-06,
"loss": 0.5741,
"step": 9232
},
{
"epoch": 0.58,
"grad_norm": 0.8525993824005127,
"learning_rate": 3.876966499782168e-06,
"loss": 0.5531,
"step": 9233
},
{
"epoch": 0.59,
"grad_norm": 0.8899672627449036,
"learning_rate": 3.875966744401311e-06,
"loss": 0.6231,
"step": 9234
},
{
"epoch": 0.59,
"grad_norm": 0.9033567905426025,
"learning_rate": 3.874967036349585e-06,
"loss": 0.5987,
"step": 9235
},
{
"epoch": 0.59,
"grad_norm": 0.8257200717926025,
"learning_rate": 3.8739673756690845e-06,
"loss": 0.5333,
"step": 9236
},
{
"epoch": 0.59,
"grad_norm": 0.9355111718177795,
"learning_rate": 3.872967762401899e-06,
"loss": 0.5712,
"step": 9237
},
{
"epoch": 0.59,
"grad_norm": 0.9281190037727356,
"learning_rate": 3.8719681965901225e-06,
"loss": 0.6097,
"step": 9238
},
{
"epoch": 0.59,
"grad_norm": 0.8676934242248535,
"learning_rate": 3.870968678275838e-06,
"loss": 0.6332,
"step": 9239
},
{
"epoch": 0.59,
"grad_norm": 0.8591299057006836,
"learning_rate": 3.869969207501138e-06,
"loss": 0.616,
"step": 9240
},
{
"epoch": 0.59,
"grad_norm": 0.9023558497428894,
"learning_rate": 3.868969784308101e-06,
"loss": 0.6094,
"step": 9241
},
{
"epoch": 0.59,
"grad_norm": 0.8794646859169006,
"learning_rate": 3.867970408738814e-06,
"loss": 0.566,
"step": 9242
},
{
"epoch": 0.59,
"grad_norm": 0.8649892210960388,
"learning_rate": 3.866971080835352e-06,
"loss": 0.5934,
"step": 9243
},
{
"epoch": 0.59,
"grad_norm": 0.9679709672927856,
"learning_rate": 3.8659718006398e-06,
"loss": 0.6249,
"step": 9244
},
{
"epoch": 0.59,
"grad_norm": 0.8926246166229248,
"learning_rate": 3.864972568194227e-06,
"loss": 0.5787,
"step": 9245
},
{
"epoch": 0.59,
"grad_norm": 0.8640733957290649,
"learning_rate": 3.863973383540714e-06,
"loss": 0.6095,
"step": 9246
},
{
"epoch": 0.59,
"grad_norm": 0.8310282230377197,
"learning_rate": 3.8629742467213266e-06,
"loss": 0.5379,
"step": 9247
},
{
"epoch": 0.59,
"grad_norm": 0.8928051590919495,
"learning_rate": 3.86197515777814e-06,
"loss": 0.538,
"step": 9248
},
{
"epoch": 0.59,
"grad_norm": 0.9271872639656067,
"learning_rate": 3.860976116753221e-06,
"loss": 0.5781,
"step": 9249
},
{
"epoch": 0.59,
"grad_norm": 0.8660386204719543,
"learning_rate": 3.859977123688636e-06,
"loss": 0.6074,
"step": 9250
},
{
"epoch": 0.59,
"grad_norm": 0.8669256567955017,
"learning_rate": 3.858978178626446e-06,
"loss": 0.59,
"step": 9251
},
{
"epoch": 0.59,
"grad_norm": 0.8511551022529602,
"learning_rate": 3.8579792816087175e-06,
"loss": 0.5496,
"step": 9252
},
{
"epoch": 0.59,
"grad_norm": 0.8670158386230469,
"learning_rate": 3.856980432677508e-06,
"loss": 0.5846,
"step": 9253
},
{
"epoch": 0.59,
"grad_norm": 0.9106800556182861,
"learning_rate": 3.855981631874877e-06,
"loss": 0.6221,
"step": 9254
},
{
"epoch": 0.59,
"grad_norm": 0.9327592253684998,
"learning_rate": 3.85498287924288e-06,
"loss": 0.6351,
"step": 9255
},
{
"epoch": 0.59,
"grad_norm": 0.8513845801353455,
"learning_rate": 3.853984174823568e-06,
"loss": 0.5642,
"step": 9256
},
{
"epoch": 0.59,
"grad_norm": 0.9598379135131836,
"learning_rate": 3.852985518658997e-06,
"loss": 0.5821,
"step": 9257
},
{
"epoch": 0.59,
"grad_norm": 0.9493588209152222,
"learning_rate": 3.851986910791217e-06,
"loss": 0.575,
"step": 9258
},
{
"epoch": 0.59,
"grad_norm": 0.8645276427268982,
"learning_rate": 3.850988351262274e-06,
"loss": 0.5513,
"step": 9259
},
{
"epoch": 0.59,
"grad_norm": 0.8836858868598938,
"learning_rate": 3.849989840114213e-06,
"loss": 0.6298,
"step": 9260
},
{
"epoch": 0.59,
"grad_norm": 0.8826265931129456,
"learning_rate": 3.84899137738908e-06,
"loss": 0.5903,
"step": 9261
},
{
"epoch": 0.59,
"grad_norm": 0.8552426695823669,
"learning_rate": 3.847992963128917e-06,
"loss": 0.578,
"step": 9262
},
{
"epoch": 0.59,
"grad_norm": 0.8744699954986572,
"learning_rate": 3.846994597375763e-06,
"loss": 0.5525,
"step": 9263
},
{
"epoch": 0.59,
"grad_norm": 0.9690203070640564,
"learning_rate": 3.845996280171653e-06,
"loss": 0.633,
"step": 9264
},
{
"epoch": 0.59,
"grad_norm": 0.9128517508506775,
"learning_rate": 3.844998011558626e-06,
"loss": 0.6275,
"step": 9265
},
{
"epoch": 0.59,
"grad_norm": 0.8809550404548645,
"learning_rate": 3.843999791578716e-06,
"loss": 0.5585,
"step": 9266
},
{
"epoch": 0.59,
"grad_norm": 0.8937491178512573,
"learning_rate": 3.843001620273954e-06,
"loss": 0.5942,
"step": 9267
},
{
"epoch": 0.59,
"grad_norm": 0.8887850046157837,
"learning_rate": 3.842003497686367e-06,
"loss": 0.5823,
"step": 9268
},
{
"epoch": 0.59,
"grad_norm": 0.8240920305252075,
"learning_rate": 3.841005423857984e-06,
"loss": 0.561,
"step": 9269
},
{
"epoch": 0.59,
"grad_norm": 0.889115035533905,
"learning_rate": 3.840007398830833e-06,
"loss": 0.5673,
"step": 9270
},
{
"epoch": 0.59,
"grad_norm": 0.9033503532409668,
"learning_rate": 3.839009422646935e-06,
"loss": 0.6556,
"step": 9271
},
{
"epoch": 0.59,
"grad_norm": 0.8702270984649658,
"learning_rate": 3.8380114953483095e-06,
"loss": 0.6081,
"step": 9272
},
{
"epoch": 0.59,
"grad_norm": 0.8264375329017639,
"learning_rate": 3.837013616976977e-06,
"loss": 0.5227,
"step": 9273
},
{
"epoch": 0.59,
"grad_norm": 0.9013060927391052,
"learning_rate": 3.8360157875749575e-06,
"loss": 0.6422,
"step": 9274
},
{
"epoch": 0.59,
"grad_norm": 0.8687025904655457,
"learning_rate": 3.835018007184265e-06,
"loss": 0.6144,
"step": 9275
},
{
"epoch": 0.59,
"grad_norm": 0.8397945761680603,
"learning_rate": 3.834020275846909e-06,
"loss": 0.5611,
"step": 9276
},
{
"epoch": 0.59,
"grad_norm": 0.8652381896972656,
"learning_rate": 3.833022593604902e-06,
"loss": 0.5984,
"step": 9277
},
{
"epoch": 0.59,
"grad_norm": 0.8577977418899536,
"learning_rate": 3.832024960500257e-06,
"loss": 0.5211,
"step": 9278
},
{
"epoch": 0.59,
"grad_norm": 0.9175687432289124,
"learning_rate": 3.8310273765749774e-06,
"loss": 0.6194,
"step": 9279
},
{
"epoch": 0.59,
"grad_norm": 0.8669849038124084,
"learning_rate": 3.830029841871067e-06,
"loss": 0.5919,
"step": 9280
},
{
"epoch": 0.59,
"grad_norm": 0.9313320517539978,
"learning_rate": 3.82903235643053e-06,
"loss": 0.6049,
"step": 9281
},
{
"epoch": 0.59,
"grad_norm": 0.8776915669441223,
"learning_rate": 3.828034920295368e-06,
"loss": 0.5875,
"step": 9282
},
{
"epoch": 0.59,
"grad_norm": 0.9610856175422668,
"learning_rate": 3.827037533507579e-06,
"loss": 0.5978,
"step": 9283
},
{
"epoch": 0.59,
"grad_norm": 0.8849360942840576,
"learning_rate": 3.826040196109158e-06,
"loss": 0.5807,
"step": 9284
},
{
"epoch": 0.59,
"grad_norm": 0.9191281795501709,
"learning_rate": 3.825042908142102e-06,
"loss": 0.5998,
"step": 9285
},
{
"epoch": 0.59,
"grad_norm": 0.8342413306236267,
"learning_rate": 3.824045669648398e-06,
"loss": 0.5753,
"step": 9286
},
{
"epoch": 0.59,
"grad_norm": 0.8650674819946289,
"learning_rate": 3.823048480670044e-06,
"loss": 0.5979,
"step": 9287
},
{
"epoch": 0.59,
"grad_norm": 0.8660332560539246,
"learning_rate": 3.8220513412490215e-06,
"loss": 0.5876,
"step": 9288
},
{
"epoch": 0.59,
"grad_norm": 0.9197229743003845,
"learning_rate": 3.821054251427321e-06,
"loss": 0.622,
"step": 9289
},
{
"epoch": 0.59,
"grad_norm": 0.8805333375930786,
"learning_rate": 3.820057211246923e-06,
"loss": 0.5349,
"step": 9290
},
{
"epoch": 0.59,
"grad_norm": 1.0064138174057007,
"learning_rate": 3.819060220749813e-06,
"loss": 0.6236,
"step": 9291
},
{
"epoch": 0.59,
"grad_norm": 0.9283258318901062,
"learning_rate": 3.8180632799779675e-06,
"loss": 0.56,
"step": 9292
},
{
"epoch": 0.59,
"grad_norm": 0.9393151998519897,
"learning_rate": 3.817066388973367e-06,
"loss": 0.5407,
"step": 9293
},
{
"epoch": 0.59,
"grad_norm": 0.87945157289505,
"learning_rate": 3.816069547777983e-06,
"loss": 0.6007,
"step": 9294
},
{
"epoch": 0.59,
"grad_norm": 0.9038872718811035,
"learning_rate": 3.815072756433794e-06,
"loss": 0.6051,
"step": 9295
},
{
"epoch": 0.59,
"grad_norm": 0.8580070734024048,
"learning_rate": 3.814076014982769e-06,
"loss": 0.5844,
"step": 9296
},
{
"epoch": 0.59,
"grad_norm": 0.9397634863853455,
"learning_rate": 3.8130793234668782e-06,
"loss": 0.6006,
"step": 9297
},
{
"epoch": 0.59,
"grad_norm": 0.9336340427398682,
"learning_rate": 3.812082681928086e-06,
"loss": 0.6343,
"step": 9298
},
{
"epoch": 0.59,
"grad_norm": 0.8567546606063843,
"learning_rate": 3.81108609040836e-06,
"loss": 0.5669,
"step": 9299
},
{
"epoch": 0.59,
"grad_norm": 0.9026763439178467,
"learning_rate": 3.810089548949665e-06,
"loss": 0.6,
"step": 9300
},
{
"epoch": 0.59,
"grad_norm": 0.8796485066413879,
"learning_rate": 3.8090930575939588e-06,
"loss": 0.56,
"step": 9301
},
{
"epoch": 0.59,
"grad_norm": 0.9571773409843445,
"learning_rate": 3.8080966163832e-06,
"loss": 0.6081,
"step": 9302
},
{
"epoch": 0.59,
"grad_norm": 0.8654407262802124,
"learning_rate": 3.807100225359346e-06,
"loss": 0.5752,
"step": 9303
},
{
"epoch": 0.59,
"grad_norm": 0.942138135433197,
"learning_rate": 3.8061038845643535e-06,
"loss": 0.6286,
"step": 9304
},
{
"epoch": 0.59,
"grad_norm": 0.9119827747344971,
"learning_rate": 3.8051075940401727e-06,
"loss": 0.5855,
"step": 9305
},
{
"epoch": 0.59,
"grad_norm": 0.908536970615387,
"learning_rate": 3.8041113538287537e-06,
"loss": 0.6099,
"step": 9306
},
{
"epoch": 0.59,
"grad_norm": 0.9727365970611572,
"learning_rate": 3.803115163972044e-06,
"loss": 0.5992,
"step": 9307
},
{
"epoch": 0.59,
"grad_norm": 0.9480968713760376,
"learning_rate": 3.8021190245119937e-06,
"loss": 0.6685,
"step": 9308
},
{
"epoch": 0.59,
"grad_norm": 0.9626975655555725,
"learning_rate": 3.8011229354905445e-06,
"loss": 0.633,
"step": 9309
},
{
"epoch": 0.59,
"grad_norm": 0.9095605611801147,
"learning_rate": 3.8001268969496357e-06,
"loss": 0.5897,
"step": 9310
},
{
"epoch": 0.59,
"grad_norm": 0.9230700135231018,
"learning_rate": 3.799130908931209e-06,
"loss": 0.6167,
"step": 9311
},
{
"epoch": 0.59,
"grad_norm": 0.8941061496734619,
"learning_rate": 3.7981349714772044e-06,
"loss": 0.5678,
"step": 9312
},
{
"epoch": 0.59,
"grad_norm": 0.9019367098808289,
"learning_rate": 3.7971390846295546e-06,
"loss": 0.6376,
"step": 9313
},
{
"epoch": 0.59,
"grad_norm": 0.9550539255142212,
"learning_rate": 3.7961432484301925e-06,
"loss": 0.6213,
"step": 9314
},
{
"epoch": 0.59,
"grad_norm": 0.8824061155319214,
"learning_rate": 3.7951474629210517e-06,
"loss": 0.5651,
"step": 9315
},
{
"epoch": 0.59,
"grad_norm": 0.9145764708518982,
"learning_rate": 3.7941517281440577e-06,
"loss": 0.5492,
"step": 9316
},
{
"epoch": 0.59,
"grad_norm": 0.8980282545089722,
"learning_rate": 3.7931560441411413e-06,
"loss": 0.5882,
"step": 9317
},
{
"epoch": 0.59,
"grad_norm": 0.8780221343040466,
"learning_rate": 3.792160410954225e-06,
"loss": 0.5892,
"step": 9318
},
{
"epoch": 0.59,
"grad_norm": 0.8798972368240356,
"learning_rate": 3.791164828625233e-06,
"loss": 0.5706,
"step": 9319
},
{
"epoch": 0.59,
"grad_norm": 0.8012358546257019,
"learning_rate": 3.7901692971960823e-06,
"loss": 0.5133,
"step": 9320
},
{
"epoch": 0.59,
"grad_norm": 0.9223332405090332,
"learning_rate": 3.7891738167086968e-06,
"loss": 0.5617,
"step": 9321
},
{
"epoch": 0.59,
"grad_norm": 0.8617243766784668,
"learning_rate": 3.7881783872049875e-06,
"loss": 0.6052,
"step": 9322
},
{
"epoch": 0.59,
"grad_norm": 0.8890591859817505,
"learning_rate": 3.7871830087268726e-06,
"loss": 0.5505,
"step": 9323
},
{
"epoch": 0.59,
"grad_norm": 0.9087486267089844,
"learning_rate": 3.7861876813162596e-06,
"loss": 0.5906,
"step": 9324
},
{
"epoch": 0.59,
"grad_norm": 0.9168681502342224,
"learning_rate": 3.7851924050150633e-06,
"loss": 0.6217,
"step": 9325
},
{
"epoch": 0.59,
"grad_norm": 0.8641383051872253,
"learning_rate": 3.7841971798651876e-06,
"loss": 0.5495,
"step": 9326
},
{
"epoch": 0.59,
"grad_norm": 0.9064701795578003,
"learning_rate": 3.78320200590854e-06,
"loss": 0.6339,
"step": 9327
},
{
"epoch": 0.59,
"grad_norm": 0.844735324382782,
"learning_rate": 3.782206883187021e-06,
"loss": 0.6285,
"step": 9328
},
{
"epoch": 0.59,
"grad_norm": 0.953070878982544,
"learning_rate": 3.7812118117425363e-06,
"loss": 0.5556,
"step": 9329
},
{
"epoch": 0.59,
"grad_norm": 0.8404299020767212,
"learning_rate": 3.7802167916169808e-06,
"loss": 0.6154,
"step": 9330
},
{
"epoch": 0.59,
"grad_norm": 0.8700167536735535,
"learning_rate": 3.7792218228522536e-06,
"loss": 0.6355,
"step": 9331
},
{
"epoch": 0.59,
"grad_norm": 0.8953800797462463,
"learning_rate": 3.7782269054902493e-06,
"loss": 0.5983,
"step": 9332
},
{
"epoch": 0.59,
"grad_norm": 0.8571730256080627,
"learning_rate": 3.777232039572858e-06,
"loss": 0.5895,
"step": 9333
},
{
"epoch": 0.59,
"grad_norm": 0.9391055107116699,
"learning_rate": 3.7762372251419722e-06,
"loss": 0.64,
"step": 9334
},
{
"epoch": 0.59,
"grad_norm": 0.8176417350769043,
"learning_rate": 3.7752424622394807e-06,
"loss": 0.5433,
"step": 9335
},
{
"epoch": 0.59,
"grad_norm": 0.8655744194984436,
"learning_rate": 3.7742477509072684e-06,
"loss": 0.5325,
"step": 9336
},
{
"epoch": 0.59,
"grad_norm": 0.9132146835327148,
"learning_rate": 3.7732530911872177e-06,
"loss": 0.5687,
"step": 9337
},
{
"epoch": 0.59,
"grad_norm": 0.9163744449615479,
"learning_rate": 3.7722584831212127e-06,
"loss": 0.5955,
"step": 9338
},
{
"epoch": 0.59,
"grad_norm": 0.9320352673530579,
"learning_rate": 3.771263926751133e-06,
"loss": 0.5999,
"step": 9339
},
{
"epoch": 0.59,
"grad_norm": 0.8946950435638428,
"learning_rate": 3.7702694221188548e-06,
"loss": 0.599,
"step": 9340
},
{
"epoch": 0.59,
"grad_norm": 0.8757476210594177,
"learning_rate": 3.769274969266251e-06,
"loss": 0.5752,
"step": 9341
},
{
"epoch": 0.59,
"grad_norm": 0.9394705295562744,
"learning_rate": 3.768280568235198e-06,
"loss": 0.607,
"step": 9342
},
{
"epoch": 0.59,
"grad_norm": 0.8652751445770264,
"learning_rate": 3.767286219067566e-06,
"loss": 0.5999,
"step": 9343
},
{
"epoch": 0.59,
"grad_norm": 0.9048978686332703,
"learning_rate": 3.766291921805224e-06,
"loss": 0.5745,
"step": 9344
},
{
"epoch": 0.59,
"grad_norm": 0.8632850050926208,
"learning_rate": 3.765297676490035e-06,
"loss": 0.5459,
"step": 9345
},
{
"epoch": 0.59,
"grad_norm": 0.8732088804244995,
"learning_rate": 3.764303483163867e-06,
"loss": 0.6096,
"step": 9346
},
{
"epoch": 0.59,
"grad_norm": 0.914079487323761,
"learning_rate": 3.7633093418685806e-06,
"loss": 0.5492,
"step": 9347
},
{
"epoch": 0.59,
"grad_norm": 0.8865966200828552,
"learning_rate": 3.7623152526460365e-06,
"loss": 0.5839,
"step": 9348
},
{
"epoch": 0.59,
"grad_norm": 0.9960510730743408,
"learning_rate": 3.7613212155380907e-06,
"loss": 0.6053,
"step": 9349
},
{
"epoch": 0.59,
"grad_norm": 0.8940538763999939,
"learning_rate": 3.760327230586598e-06,
"loss": 0.56,
"step": 9350
},
{
"epoch": 0.59,
"grad_norm": 0.9300123453140259,
"learning_rate": 3.7593332978334153e-06,
"loss": 0.5792,
"step": 9351
},
{
"epoch": 0.59,
"grad_norm": 0.8757148385047913,
"learning_rate": 3.7583394173203913e-06,
"loss": 0.5871,
"step": 9352
},
{
"epoch": 0.59,
"grad_norm": 0.9180594682693481,
"learning_rate": 3.757345589089374e-06,
"loss": 0.5897,
"step": 9353
},
{
"epoch": 0.59,
"grad_norm": 0.9074809551239014,
"learning_rate": 3.75635181318221e-06,
"loss": 0.6202,
"step": 9354
},
{
"epoch": 0.59,
"grad_norm": 0.9342314600944519,
"learning_rate": 3.755358089640747e-06,
"loss": 0.6069,
"step": 9355
},
{
"epoch": 0.59,
"grad_norm": 0.9455806016921997,
"learning_rate": 3.754364418506825e-06,
"loss": 0.5676,
"step": 9356
},
{
"epoch": 0.59,
"grad_norm": 0.8812072277069092,
"learning_rate": 3.7533707998222835e-06,
"loss": 0.6217,
"step": 9357
},
{
"epoch": 0.59,
"grad_norm": 0.8865488767623901,
"learning_rate": 3.7523772336289594e-06,
"loss": 0.5901,
"step": 9358
},
{
"epoch": 0.59,
"grad_norm": 0.8491330146789551,
"learning_rate": 3.751383719968692e-06,
"loss": 0.5475,
"step": 9359
},
{
"epoch": 0.59,
"grad_norm": 0.8442015647888184,
"learning_rate": 3.7503902588833124e-06,
"loss": 0.5699,
"step": 9360
},
{
"epoch": 0.59,
"grad_norm": 0.8833118081092834,
"learning_rate": 3.7493968504146513e-06,
"loss": 0.5281,
"step": 9361
},
{
"epoch": 0.59,
"grad_norm": 0.9113878011703491,
"learning_rate": 3.748403494604539e-06,
"loss": 0.5694,
"step": 9362
},
{
"epoch": 0.59,
"grad_norm": 0.8851996064186096,
"learning_rate": 3.747410191494799e-06,
"loss": 0.6256,
"step": 9363
},
{
"epoch": 0.59,
"grad_norm": 0.9763572216033936,
"learning_rate": 3.74641694112726e-06,
"loss": 0.5549,
"step": 9364
},
{
"epoch": 0.59,
"grad_norm": 0.900982141494751,
"learning_rate": 3.745423743543744e-06,
"loss": 0.5617,
"step": 9365
},
{
"epoch": 0.59,
"grad_norm": 0.9159935116767883,
"learning_rate": 3.7444305987860698e-06,
"loss": 0.6144,
"step": 9366
},
{
"epoch": 0.59,
"grad_norm": 0.949503481388092,
"learning_rate": 3.7434375068960528e-06,
"loss": 0.583,
"step": 9367
},
{
"epoch": 0.59,
"grad_norm": 0.8828021287918091,
"learning_rate": 3.7424444679155126e-06,
"loss": 0.6385,
"step": 9368
},
{
"epoch": 0.59,
"grad_norm": 0.8816289901733398,
"learning_rate": 3.7414514818862613e-06,
"loss": 0.6269,
"step": 9369
},
{
"epoch": 0.59,
"grad_norm": 0.9142639636993408,
"learning_rate": 3.7404585488501106e-06,
"loss": 0.6134,
"step": 9370
},
{
"epoch": 0.59,
"grad_norm": 0.8727523684501648,
"learning_rate": 3.7394656688488663e-06,
"loss": 0.5525,
"step": 9371
},
{
"epoch": 0.59,
"grad_norm": 0.9459042549133301,
"learning_rate": 3.7384728419243386e-06,
"loss": 0.5931,
"step": 9372
},
{
"epoch": 0.59,
"grad_norm": 0.916693389415741,
"learning_rate": 3.7374800681183334e-06,
"loss": 0.5991,
"step": 9373
},
{
"epoch": 0.59,
"grad_norm": 0.9326224327087402,
"learning_rate": 3.736487347472649e-06,
"loss": 0.6123,
"step": 9374
},
{
"epoch": 0.59,
"grad_norm": 0.962369978427887,
"learning_rate": 3.735494680029086e-06,
"loss": 0.6034,
"step": 9375
},
{
"epoch": 0.59,
"grad_norm": 0.8627373576164246,
"learning_rate": 3.734502065829443e-06,
"loss": 0.5739,
"step": 9376
},
{
"epoch": 0.59,
"grad_norm": 0.9000149369239807,
"learning_rate": 3.7335095049155173e-06,
"loss": 0.5426,
"step": 9377
},
{
"epoch": 0.59,
"grad_norm": 0.9197530150413513,
"learning_rate": 3.732516997329101e-06,
"loss": 0.6075,
"step": 9378
},
{
"epoch": 0.59,
"grad_norm": 0.8494321703910828,
"learning_rate": 3.731524543111983e-06,
"loss": 0.6488,
"step": 9379
},
{
"epoch": 0.59,
"grad_norm": 0.9430056810379028,
"learning_rate": 3.7305321423059526e-06,
"loss": 0.5836,
"step": 9380
},
{
"epoch": 0.59,
"grad_norm": 0.8478774428367615,
"learning_rate": 3.7295397949528e-06,
"loss": 0.565,
"step": 9381
},
{
"epoch": 0.59,
"grad_norm": 0.9042559266090393,
"learning_rate": 3.7285475010943067e-06,
"loss": 0.5992,
"step": 9382
},
{
"epoch": 0.59,
"grad_norm": 0.8787109851837158,
"learning_rate": 3.7275552607722544e-06,
"loss": 0.5903,
"step": 9383
},
{
"epoch": 0.59,
"grad_norm": 0.9351126551628113,
"learning_rate": 3.726563074028422e-06,
"loss": 0.5989,
"step": 9384
},
{
"epoch": 0.59,
"grad_norm": 0.8641285300254822,
"learning_rate": 3.7255709409045914e-06,
"loss": 0.5639,
"step": 9385
},
{
"epoch": 0.59,
"grad_norm": 0.8513421416282654,
"learning_rate": 3.724578861442535e-06,
"loss": 0.5561,
"step": 9386
},
{
"epoch": 0.59,
"grad_norm": 0.9056752324104309,
"learning_rate": 3.7235868356840244e-06,
"loss": 0.6007,
"step": 9387
},
{
"epoch": 0.59,
"grad_norm": 0.8228696584701538,
"learning_rate": 3.722594863670831e-06,
"loss": 0.5458,
"step": 9388
},
{
"epoch": 0.59,
"grad_norm": 0.8777575492858887,
"learning_rate": 3.7216029454447262e-06,
"loss": 0.5547,
"step": 9389
},
{
"epoch": 0.59,
"grad_norm": 0.8967665433883667,
"learning_rate": 3.720611081047474e-06,
"loss": 0.5842,
"step": 9390
},
{
"epoch": 0.59,
"grad_norm": 0.8031774759292603,
"learning_rate": 3.7196192705208378e-06,
"loss": 0.5461,
"step": 9391
},
{
"epoch": 0.6,
"grad_norm": 0.9115647077560425,
"learning_rate": 3.7186275139065807e-06,
"loss": 0.5559,
"step": 9392
},
{
"epoch": 0.6,
"grad_norm": 0.9450206756591797,
"learning_rate": 3.7176358112464593e-06,
"loss": 0.6226,
"step": 9393
},
{
"epoch": 0.6,
"grad_norm": 0.866363525390625,
"learning_rate": 3.716644162582235e-06,
"loss": 0.5949,
"step": 9394
},
{
"epoch": 0.6,
"grad_norm": 0.8881354331970215,
"learning_rate": 3.7156525679556597e-06,
"loss": 0.6358,
"step": 9395
},
{
"epoch": 0.6,
"grad_norm": 0.8575799465179443,
"learning_rate": 3.7146610274084875e-06,
"loss": 0.6055,
"step": 9396
},
{
"epoch": 0.6,
"grad_norm": 0.9607718586921692,
"learning_rate": 3.7136695409824665e-06,
"loss": 0.5996,
"step": 9397
},
{
"epoch": 0.6,
"grad_norm": 0.922845184803009,
"learning_rate": 3.712678108719348e-06,
"loss": 0.6158,
"step": 9398
},
{
"epoch": 0.6,
"grad_norm": 0.9459704756736755,
"learning_rate": 3.711686730660875e-06,
"loss": 0.5811,
"step": 9399
},
{
"epoch": 0.6,
"grad_norm": 0.8620908856391907,
"learning_rate": 3.710695406848794e-06,
"loss": 0.6261,
"step": 9400
},
{
"epoch": 0.6,
"grad_norm": 0.9615730047225952,
"learning_rate": 3.709704137324841e-06,
"loss": 0.6142,
"step": 9401
},
{
"epoch": 0.6,
"grad_norm": 0.8960047960281372,
"learning_rate": 3.7087129221307605e-06,
"loss": 0.5535,
"step": 9402
},
{
"epoch": 0.6,
"grad_norm": 0.9143571257591248,
"learning_rate": 3.7077217613082863e-06,
"loss": 0.5854,
"step": 9403
},
{
"epoch": 0.6,
"grad_norm": 0.9054227471351624,
"learning_rate": 3.7067306548991543e-06,
"loss": 0.6092,
"step": 9404
},
{
"epoch": 0.6,
"grad_norm": 0.9034886956214905,
"learning_rate": 3.7057396029450925e-06,
"loss": 0.6547,
"step": 9405
},
{
"epoch": 0.6,
"grad_norm": 0.862893283367157,
"learning_rate": 3.7047486054878367e-06,
"loss": 0.6051,
"step": 9406
},
{
"epoch": 0.6,
"grad_norm": 0.8760849237442017,
"learning_rate": 3.7037576625691095e-06,
"loss": 0.5925,
"step": 9407
},
{
"epoch": 0.6,
"grad_norm": 0.8564549684524536,
"learning_rate": 3.7027667742306393e-06,
"loss": 0.5745,
"step": 9408
},
{
"epoch": 0.6,
"grad_norm": 0.8729873895645142,
"learning_rate": 3.7017759405141476e-06,
"loss": 0.5559,
"step": 9409
},
{
"epoch": 0.6,
"grad_norm": 0.8863754868507385,
"learning_rate": 3.7007851614613522e-06,
"loss": 0.6388,
"step": 9410
},
{
"epoch": 0.6,
"grad_norm": 0.9098048210144043,
"learning_rate": 3.699794437113975e-06,
"loss": 0.6317,
"step": 9411
},
{
"epoch": 0.6,
"grad_norm": 0.8687307834625244,
"learning_rate": 3.698803767513732e-06,
"loss": 0.5728,
"step": 9412
},
{
"epoch": 0.6,
"grad_norm": 0.9310614466667175,
"learning_rate": 3.6978131527023363e-06,
"loss": 0.5108,
"step": 9413
},
{
"epoch": 0.6,
"grad_norm": 0.8796650767326355,
"learning_rate": 3.696822592721497e-06,
"loss": 0.6081,
"step": 9414
},
{
"epoch": 0.6,
"grad_norm": 0.8547250628471375,
"learning_rate": 3.695832087612925e-06,
"loss": 0.5948,
"step": 9415
},
{
"epoch": 0.6,
"grad_norm": 0.8584107756614685,
"learning_rate": 3.6948416374183287e-06,
"loss": 0.6096,
"step": 9416
},
{
"epoch": 0.6,
"grad_norm": 0.8845054507255554,
"learning_rate": 3.6938512421794103e-06,
"loss": 0.6239,
"step": 9417
},
{
"epoch": 0.6,
"grad_norm": 0.8871665596961975,
"learning_rate": 3.6928609019378702e-06,
"loss": 0.6064,
"step": 9418
},
{
"epoch": 0.6,
"grad_norm": 0.9024144411087036,
"learning_rate": 3.6918706167354125e-06,
"loss": 0.5947,
"step": 9419
},
{
"epoch": 0.6,
"grad_norm": 0.8818714618682861,
"learning_rate": 3.690880386613732e-06,
"loss": 0.5635,
"step": 9420
},
{
"epoch": 0.6,
"grad_norm": 0.8889881372451782,
"learning_rate": 3.689890211614525e-06,
"loss": 0.5769,
"step": 9421
},
{
"epoch": 0.6,
"grad_norm": 0.8806608319282532,
"learning_rate": 3.6889000917794816e-06,
"loss": 0.5825,
"step": 9422
},
{
"epoch": 0.6,
"grad_norm": 0.9338346719741821,
"learning_rate": 3.6879100271502953e-06,
"loss": 0.6212,
"step": 9423
},
{
"epoch": 0.6,
"grad_norm": 0.8248009085655212,
"learning_rate": 3.6869200177686543e-06,
"loss": 0.5429,
"step": 9424
},
{
"epoch": 0.6,
"grad_norm": 0.8343362212181091,
"learning_rate": 3.6859300636762423e-06,
"loss": 0.5304,
"step": 9425
},
{
"epoch": 0.6,
"grad_norm": 0.9038220643997192,
"learning_rate": 3.6849401649147453e-06,
"loss": 0.6251,
"step": 9426
},
{
"epoch": 0.6,
"grad_norm": 0.929233968257904,
"learning_rate": 3.683950321525841e-06,
"loss": 0.5699,
"step": 9427
},
{
"epoch": 0.6,
"grad_norm": 0.8593815565109253,
"learning_rate": 3.682960533551213e-06,
"loss": 0.5558,
"step": 9428
},
{
"epoch": 0.6,
"grad_norm": 0.8797299861907959,
"learning_rate": 3.681970801032534e-06,
"loss": 0.5799,
"step": 9429
},
{
"epoch": 0.6,
"grad_norm": 0.8691550493240356,
"learning_rate": 3.68098112401148e-06,
"loss": 0.5446,
"step": 9430
},
{
"epoch": 0.6,
"grad_norm": 0.8693859577178955,
"learning_rate": 3.6799915025297206e-06,
"loss": 0.5527,
"step": 9431
},
{
"epoch": 0.6,
"grad_norm": 0.8916878700256348,
"learning_rate": 3.6790019366289293e-06,
"loss": 0.566,
"step": 9432
},
{
"epoch": 0.6,
"grad_norm": 0.907010018825531,
"learning_rate": 3.67801242635077e-06,
"loss": 0.6145,
"step": 9433
},
{
"epoch": 0.6,
"grad_norm": 0.8580716848373413,
"learning_rate": 3.6770229717369086e-06,
"loss": 0.5711,
"step": 9434
},
{
"epoch": 0.6,
"grad_norm": 0.8871995806694031,
"learning_rate": 3.6760335728290062e-06,
"loss": 0.6209,
"step": 9435
},
{
"epoch": 0.6,
"grad_norm": 0.8500985503196716,
"learning_rate": 3.6750442296687272e-06,
"loss": 0.584,
"step": 9436
},
{
"epoch": 0.6,
"grad_norm": 0.871005654335022,
"learning_rate": 3.6740549422977244e-06,
"loss": 0.5876,
"step": 9437
},
{
"epoch": 0.6,
"grad_norm": 0.929032564163208,
"learning_rate": 3.6730657107576574e-06,
"loss": 0.6177,
"step": 9438
},
{
"epoch": 0.6,
"grad_norm": 0.9027160406112671,
"learning_rate": 3.6720765350901765e-06,
"loss": 0.5653,
"step": 9439
},
{
"epoch": 0.6,
"grad_norm": 0.939904510974884,
"learning_rate": 3.671087415336931e-06,
"loss": 0.6247,
"step": 9440
},
{
"epoch": 0.6,
"grad_norm": 0.8883887529373169,
"learning_rate": 3.6700983515395726e-06,
"loss": 0.5748,
"step": 9441
},
{
"epoch": 0.6,
"grad_norm": 0.9077379703521729,
"learning_rate": 3.669109343739747e-06,
"loss": 0.5666,
"step": 9442
},
{
"epoch": 0.6,
"grad_norm": 0.91063392162323,
"learning_rate": 3.668120391979098e-06,
"loss": 0.5704,
"step": 9443
},
{
"epoch": 0.6,
"grad_norm": 0.8031530380249023,
"learning_rate": 3.6671314962992634e-06,
"loss": 0.5456,
"step": 9444
},
{
"epoch": 0.6,
"grad_norm": 0.8312580585479736,
"learning_rate": 3.666142656741886e-06,
"loss": 0.5977,
"step": 9445
},
{
"epoch": 0.6,
"grad_norm": 0.8705939054489136,
"learning_rate": 3.6651538733486027e-06,
"loss": 0.5965,
"step": 9446
},
{
"epoch": 0.6,
"grad_norm": 0.9241865873336792,
"learning_rate": 3.664165146161045e-06,
"loss": 0.5973,
"step": 9447
},
{
"epoch": 0.6,
"grad_norm": 0.8538462519645691,
"learning_rate": 3.663176475220844e-06,
"loss": 0.5936,
"step": 9448
},
{
"epoch": 0.6,
"grad_norm": 0.8608808517456055,
"learning_rate": 3.6621878605696338e-06,
"loss": 0.5336,
"step": 9449
},
{
"epoch": 0.6,
"grad_norm": 0.9277994632720947,
"learning_rate": 3.6611993022490383e-06,
"loss": 0.5753,
"step": 9450
},
{
"epoch": 0.6,
"grad_norm": 0.8902243375778198,
"learning_rate": 3.660210800300683e-06,
"loss": 0.5836,
"step": 9451
},
{
"epoch": 0.6,
"grad_norm": 0.9182867407798767,
"learning_rate": 3.6592223547661888e-06,
"loss": 0.616,
"step": 9452
},
{
"epoch": 0.6,
"grad_norm": 0.8831941485404968,
"learning_rate": 3.6582339656871778e-06,
"loss": 0.5666,
"step": 9453
},
{
"epoch": 0.6,
"grad_norm": 0.9414946436882019,
"learning_rate": 3.6572456331052673e-06,
"loss": 0.5904,
"step": 9454
},
{
"epoch": 0.6,
"grad_norm": 0.8741511106491089,
"learning_rate": 3.656257357062073e-06,
"loss": 0.6501,
"step": 9455
},
{
"epoch": 0.6,
"grad_norm": 0.882854700088501,
"learning_rate": 3.6552691375992056e-06,
"loss": 0.661,
"step": 9456
},
{
"epoch": 0.6,
"grad_norm": 0.8658345937728882,
"learning_rate": 3.6542809747582755e-06,
"loss": 0.593,
"step": 9457
},
{
"epoch": 0.6,
"grad_norm": 0.856181800365448,
"learning_rate": 3.6532928685808937e-06,
"loss": 0.6158,
"step": 9458
},
{
"epoch": 0.6,
"grad_norm": 0.9174668788909912,
"learning_rate": 3.6523048191086654e-06,
"loss": 0.5738,
"step": 9459
},
{
"epoch": 0.6,
"grad_norm": 0.9109136462211609,
"learning_rate": 3.6513168263831913e-06,
"loss": 0.5629,
"step": 9460
},
{
"epoch": 0.6,
"grad_norm": 0.9288260340690613,
"learning_rate": 3.6503288904460725e-06,
"loss": 0.5806,
"step": 9461
},
{
"epoch": 0.6,
"grad_norm": 0.912398636341095,
"learning_rate": 3.6493410113389116e-06,
"loss": 0.6132,
"step": 9462
},
{
"epoch": 0.6,
"grad_norm": 0.9341295957565308,
"learning_rate": 3.648353189103302e-06,
"loss": 0.6314,
"step": 9463
},
{
"epoch": 0.6,
"grad_norm": 0.885158121585846,
"learning_rate": 3.6473654237808365e-06,
"loss": 0.5599,
"step": 9464
},
{
"epoch": 0.6,
"grad_norm": 0.8969646096229553,
"learning_rate": 3.6463777154131065e-06,
"loss": 0.6136,
"step": 9465
},
{
"epoch": 0.6,
"grad_norm": 0.8847031593322754,
"learning_rate": 3.645390064041704e-06,
"loss": 0.5568,
"step": 9466
},
{
"epoch": 0.6,
"grad_norm": 0.857172429561615,
"learning_rate": 3.6444024697082137e-06,
"loss": 0.5342,
"step": 9467
},
{
"epoch": 0.6,
"grad_norm": 0.8913065791130066,
"learning_rate": 3.6434149324542185e-06,
"loss": 0.6032,
"step": 9468
},
{
"epoch": 0.6,
"grad_norm": 0.856587290763855,
"learning_rate": 3.6424274523213e-06,
"loss": 0.5779,
"step": 9469
},
{
"epoch": 0.6,
"grad_norm": 0.9097947478294373,
"learning_rate": 3.641440029351041e-06,
"loss": 0.5989,
"step": 9470
},
{
"epoch": 0.6,
"grad_norm": 0.9367252588272095,
"learning_rate": 3.640452663585017e-06,
"loss": 0.6136,
"step": 9471
},
{
"epoch": 0.6,
"grad_norm": 0.8729375004768372,
"learning_rate": 3.6394653550647996e-06,
"loss": 0.5497,
"step": 9472
},
{
"epoch": 0.6,
"grad_norm": 0.8242816925048828,
"learning_rate": 3.638478103831965e-06,
"loss": 0.5366,
"step": 9473
},
{
"epoch": 0.6,
"grad_norm": 0.8420335054397583,
"learning_rate": 3.6374909099280786e-06,
"loss": 0.5695,
"step": 9474
},
{
"epoch": 0.6,
"grad_norm": 0.8642702698707581,
"learning_rate": 3.636503773394713e-06,
"loss": 0.6007,
"step": 9475
},
{
"epoch": 0.6,
"grad_norm": 0.9021347761154175,
"learning_rate": 3.635516694273428e-06,
"loss": 0.6437,
"step": 9476
},
{
"epoch": 0.6,
"grad_norm": 0.9246450662612915,
"learning_rate": 3.63452967260579e-06,
"loss": 0.5961,
"step": 9477
},
{
"epoch": 0.6,
"grad_norm": 0.8637540340423584,
"learning_rate": 3.633542708433355e-06,
"loss": 0.5337,
"step": 9478
},
{
"epoch": 0.6,
"grad_norm": 0.8642069697380066,
"learning_rate": 3.632555801797686e-06,
"loss": 0.5905,
"step": 9479
},
{
"epoch": 0.6,
"grad_norm": 0.9082743525505066,
"learning_rate": 3.631568952740333e-06,
"loss": 0.5878,
"step": 9480
},
{
"epoch": 0.6,
"grad_norm": 0.9280330538749695,
"learning_rate": 3.6305821613028524e-06,
"loss": 0.5967,
"step": 9481
},
{
"epoch": 0.6,
"grad_norm": 0.8534235954284668,
"learning_rate": 3.6295954275267914e-06,
"loss": 0.5841,
"step": 9482
},
{
"epoch": 0.6,
"grad_norm": 0.8653678297996521,
"learning_rate": 3.6286087514537017e-06,
"loss": 0.5635,
"step": 9483
},
{
"epoch": 0.6,
"grad_norm": 0.8563132882118225,
"learning_rate": 3.6276221331251253e-06,
"loss": 0.5565,
"step": 9484
},
{
"epoch": 0.6,
"grad_norm": 0.7956026196479797,
"learning_rate": 3.626635572582608e-06,
"loss": 0.5475,
"step": 9485
},
{
"epoch": 0.6,
"grad_norm": 0.9152180552482605,
"learning_rate": 3.6256490698676884e-06,
"loss": 0.5893,
"step": 9486
},
{
"epoch": 0.6,
"grad_norm": 0.8446356058120728,
"learning_rate": 3.6246626250219047e-06,
"loss": 0.5365,
"step": 9487
},
{
"epoch": 0.6,
"grad_norm": 0.8718549013137817,
"learning_rate": 3.623676238086794e-06,
"loss": 0.5374,
"step": 9488
},
{
"epoch": 0.6,
"grad_norm": 0.8933292031288147,
"learning_rate": 3.6226899091038896e-06,
"loss": 0.6108,
"step": 9489
},
{
"epoch": 0.6,
"grad_norm": 0.9709043502807617,
"learning_rate": 3.6217036381147216e-06,
"loss": 0.6381,
"step": 9490
},
{
"epoch": 0.6,
"grad_norm": 0.9374916553497314,
"learning_rate": 3.620717425160818e-06,
"loss": 0.5966,
"step": 9491
},
{
"epoch": 0.6,
"grad_norm": 0.906370997428894,
"learning_rate": 3.619731270283705e-06,
"loss": 0.6183,
"step": 9492
},
{
"epoch": 0.6,
"grad_norm": 0.9059337377548218,
"learning_rate": 3.6187451735249085e-06,
"loss": 0.5477,
"step": 9493
},
{
"epoch": 0.6,
"grad_norm": 0.9128062725067139,
"learning_rate": 3.6177591349259465e-06,
"loss": 0.638,
"step": 9494
},
{
"epoch": 0.6,
"grad_norm": 0.912139892578125,
"learning_rate": 3.616773154528339e-06,
"loss": 0.6009,
"step": 9495
},
{
"epoch": 0.6,
"grad_norm": 0.8537312150001526,
"learning_rate": 3.6157872323736017e-06,
"loss": 0.5281,
"step": 9496
},
{
"epoch": 0.6,
"grad_norm": 0.9160687923431396,
"learning_rate": 3.61480136850325e-06,
"loss": 0.5757,
"step": 9497
},
{
"epoch": 0.6,
"grad_norm": 0.8970745205879211,
"learning_rate": 3.6138155629587925e-06,
"loss": 0.5668,
"step": 9498
},
{
"epoch": 0.6,
"grad_norm": 0.8694158792495728,
"learning_rate": 3.61282981578174e-06,
"loss": 0.6049,
"step": 9499
},
{
"epoch": 0.6,
"grad_norm": 0.8502684235572815,
"learning_rate": 3.611844127013598e-06,
"loss": 0.5737,
"step": 9500
},
{
"epoch": 0.6,
"grad_norm": 0.9084693193435669,
"learning_rate": 3.6108584966958717e-06,
"loss": 0.5383,
"step": 9501
},
{
"epoch": 0.6,
"grad_norm": 0.9308486580848694,
"learning_rate": 3.6098729248700604e-06,
"loss": 0.6029,
"step": 9502
},
{
"epoch": 0.6,
"grad_norm": 0.8839983344078064,
"learning_rate": 3.6088874115776664e-06,
"loss": 0.5604,
"step": 9503
},
{
"epoch": 0.6,
"grad_norm": 0.8351526856422424,
"learning_rate": 3.6079019568601816e-06,
"loss": 0.5777,
"step": 9504
},
{
"epoch": 0.6,
"grad_norm": 0.8404673933982849,
"learning_rate": 3.606916560759104e-06,
"loss": 0.5518,
"step": 9505
},
{
"epoch": 0.6,
"grad_norm": 0.8732911348342896,
"learning_rate": 3.6059312233159237e-06,
"loss": 0.5913,
"step": 9506
},
{
"epoch": 0.6,
"grad_norm": 0.8823626637458801,
"learning_rate": 3.6049459445721303e-06,
"loss": 0.567,
"step": 9507
},
{
"epoch": 0.6,
"grad_norm": 0.8690586686134338,
"learning_rate": 3.6039607245692086e-06,
"loss": 0.5204,
"step": 9508
},
{
"epoch": 0.6,
"grad_norm": 0.8530438542366028,
"learning_rate": 3.6029755633486464e-06,
"loss": 0.6059,
"step": 9509
},
{
"epoch": 0.6,
"grad_norm": 0.8257336616516113,
"learning_rate": 3.601990460951922e-06,
"loss": 0.5089,
"step": 9510
},
{
"epoch": 0.6,
"grad_norm": 0.8363312482833862,
"learning_rate": 3.6010054174205167e-06,
"loss": 0.5395,
"step": 9511
},
{
"epoch": 0.6,
"grad_norm": 0.9421717524528503,
"learning_rate": 3.6000204327959055e-06,
"loss": 0.6033,
"step": 9512
},
{
"epoch": 0.6,
"grad_norm": 0.8582902550697327,
"learning_rate": 3.599035507119565e-06,
"loss": 0.5413,
"step": 9513
},
{
"epoch": 0.6,
"grad_norm": 0.8793736696243286,
"learning_rate": 3.5980506404329647e-06,
"loss": 0.6307,
"step": 9514
},
{
"epoch": 0.6,
"grad_norm": 0.9074476361274719,
"learning_rate": 3.597065832777576e-06,
"loss": 0.6315,
"step": 9515
},
{
"epoch": 0.6,
"grad_norm": 0.8699880242347717,
"learning_rate": 3.5960810841948622e-06,
"loss": 0.6043,
"step": 9516
},
{
"epoch": 0.6,
"grad_norm": 0.8704454302787781,
"learning_rate": 3.595096394726293e-06,
"loss": 0.5378,
"step": 9517
},
{
"epoch": 0.6,
"grad_norm": 0.8879233002662659,
"learning_rate": 3.594111764413326e-06,
"loss": 0.5705,
"step": 9518
},
{
"epoch": 0.6,
"grad_norm": 0.9449933171272278,
"learning_rate": 3.5931271932974227e-06,
"loss": 0.589,
"step": 9519
},
{
"epoch": 0.6,
"grad_norm": 0.8365026116371155,
"learning_rate": 3.592142681420039e-06,
"loss": 0.5869,
"step": 9520
},
{
"epoch": 0.6,
"grad_norm": 0.8679017424583435,
"learning_rate": 3.5911582288226275e-06,
"loss": 0.5936,
"step": 9521
},
{
"epoch": 0.6,
"grad_norm": 0.9098031520843506,
"learning_rate": 3.5901738355466433e-06,
"loss": 0.6482,
"step": 9522
},
{
"epoch": 0.6,
"grad_norm": 0.8863396048545837,
"learning_rate": 3.5891895016335347e-06,
"loss": 0.5826,
"step": 9523
},
{
"epoch": 0.6,
"grad_norm": 0.8773937225341797,
"learning_rate": 3.588205227124749e-06,
"loss": 0.5994,
"step": 9524
},
{
"epoch": 0.6,
"grad_norm": 0.9152101874351501,
"learning_rate": 3.587221012061728e-06,
"loss": 0.5859,
"step": 9525
},
{
"epoch": 0.6,
"grad_norm": 0.8691193461418152,
"learning_rate": 3.586236856485916e-06,
"loss": 0.5964,
"step": 9526
},
{
"epoch": 0.6,
"grad_norm": 0.8620315194129944,
"learning_rate": 3.5852527604387533e-06,
"loss": 0.569,
"step": 9527
},
{
"epoch": 0.6,
"grad_norm": 0.864154040813446,
"learning_rate": 3.5842687239616745e-06,
"loss": 0.5576,
"step": 9528
},
{
"epoch": 0.6,
"grad_norm": 0.8283834457397461,
"learning_rate": 3.583284747096114e-06,
"loss": 0.5909,
"step": 9529
},
{
"epoch": 0.6,
"grad_norm": 0.9094521403312683,
"learning_rate": 3.5823008298835044e-06,
"loss": 0.6439,
"step": 9530
},
{
"epoch": 0.6,
"grad_norm": 0.8895583748817444,
"learning_rate": 3.5813169723652763e-06,
"loss": 0.5486,
"step": 9531
},
{
"epoch": 0.6,
"grad_norm": 0.9506862759590149,
"learning_rate": 3.5803331745828558e-06,
"loss": 0.6163,
"step": 9532
},
{
"epoch": 0.6,
"grad_norm": 0.855083703994751,
"learning_rate": 3.579349436577665e-06,
"loss": 0.6066,
"step": 9533
},
{
"epoch": 0.6,
"grad_norm": 0.9323597550392151,
"learning_rate": 3.5783657583911268e-06,
"loss": 0.6049,
"step": 9534
},
{
"epoch": 0.6,
"grad_norm": 0.865875780582428,
"learning_rate": 3.5773821400646623e-06,
"loss": 0.5621,
"step": 9535
},
{
"epoch": 0.6,
"grad_norm": 0.846933901309967,
"learning_rate": 3.5763985816396873e-06,
"loss": 0.5495,
"step": 9536
},
{
"epoch": 0.6,
"grad_norm": 0.8472744822502136,
"learning_rate": 3.575415083157615e-06,
"loss": 0.5131,
"step": 9537
},
{
"epoch": 0.6,
"grad_norm": 0.8493777513504028,
"learning_rate": 3.5744316446598565e-06,
"loss": 0.5477,
"step": 9538
},
{
"epoch": 0.6,
"grad_norm": 0.8486292958259583,
"learning_rate": 3.5734482661878244e-06,
"loss": 0.5877,
"step": 9539
},
{
"epoch": 0.6,
"grad_norm": 0.9552314281463623,
"learning_rate": 3.5724649477829232e-06,
"loss": 0.6654,
"step": 9540
},
{
"epoch": 0.6,
"grad_norm": 0.8565940260887146,
"learning_rate": 3.5714816894865556e-06,
"loss": 0.532,
"step": 9541
},
{
"epoch": 0.6,
"grad_norm": 0.8645039200782776,
"learning_rate": 3.570498491340124e-06,
"loss": 0.5583,
"step": 9542
},
{
"epoch": 0.6,
"grad_norm": 0.8806138038635254,
"learning_rate": 3.5695153533850302e-06,
"loss": 0.5508,
"step": 9543
},
{
"epoch": 0.6,
"grad_norm": 0.8636948466300964,
"learning_rate": 3.5685322756626683e-06,
"loss": 0.5921,
"step": 9544
},
{
"epoch": 0.6,
"grad_norm": 0.8807823657989502,
"learning_rate": 3.5675492582144322e-06,
"loss": 0.62,
"step": 9545
},
{
"epoch": 0.6,
"grad_norm": 0.85367751121521,
"learning_rate": 3.566566301081712e-06,
"loss": 0.5503,
"step": 9546
},
{
"epoch": 0.6,
"grad_norm": 0.8549711108207703,
"learning_rate": 3.5655834043059e-06,
"loss": 0.5368,
"step": 9547
},
{
"epoch": 0.6,
"grad_norm": 0.9118362069129944,
"learning_rate": 3.5646005679283813e-06,
"loss": 0.585,
"step": 9548
},
{
"epoch": 0.6,
"grad_norm": 0.8719263672828674,
"learning_rate": 3.5636177919905385e-06,
"loss": 0.5321,
"step": 9549
},
{
"epoch": 0.61,
"grad_norm": 0.8751515746116638,
"learning_rate": 3.5626350765337546e-06,
"loss": 0.6133,
"step": 9550
},
{
"epoch": 0.61,
"grad_norm": 0.8825535774230957,
"learning_rate": 3.5616524215994052e-06,
"loss": 0.5868,
"step": 9551
},
{
"epoch": 0.61,
"grad_norm": 1.0655604600906372,
"learning_rate": 3.560669827228871e-06,
"loss": 0.6187,
"step": 9552
},
{
"epoch": 0.61,
"grad_norm": 0.8702925443649292,
"learning_rate": 3.559687293463522e-06,
"loss": 0.5919,
"step": 9553
},
{
"epoch": 0.61,
"grad_norm": 0.8378567695617676,
"learning_rate": 3.5587048203447314e-06,
"loss": 0.5988,
"step": 9554
},
{
"epoch": 0.61,
"grad_norm": 0.8837084770202637,
"learning_rate": 3.557722407913865e-06,
"loss": 0.5909,
"step": 9555
},
{
"epoch": 0.61,
"grad_norm": 0.8577612042427063,
"learning_rate": 3.5567400562122934e-06,
"loss": 0.4996,
"step": 9556
},
{
"epoch": 0.61,
"grad_norm": 0.8624299764633179,
"learning_rate": 3.5557577652813758e-06,
"loss": 0.5573,
"step": 9557
},
{
"epoch": 0.61,
"grad_norm": 0.9122274518013,
"learning_rate": 3.554775535162475e-06,
"loss": 0.6072,
"step": 9558
},
{
"epoch": 0.61,
"grad_norm": 0.8915830254554749,
"learning_rate": 3.5537933658969475e-06,
"loss": 0.5344,
"step": 9559
},
{
"epoch": 0.61,
"grad_norm": 1.072354793548584,
"learning_rate": 3.5528112575261525e-06,
"loss": 0.6077,
"step": 9560
},
{
"epoch": 0.61,
"grad_norm": 0.9154992699623108,
"learning_rate": 3.5518292100914396e-06,
"loss": 0.5795,
"step": 9561
},
{
"epoch": 0.61,
"grad_norm": 0.8368164896965027,
"learning_rate": 3.550847223634162e-06,
"loss": 0.6176,
"step": 9562
},
{
"epoch": 0.61,
"grad_norm": 0.8458346724510193,
"learning_rate": 3.549865298195665e-06,
"loss": 0.5834,
"step": 9563
},
{
"epoch": 0.61,
"grad_norm": 0.8798760771751404,
"learning_rate": 3.5488834338172974e-06,
"loss": 0.5863,
"step": 9564
},
{
"epoch": 0.61,
"grad_norm": 0.9277465343475342,
"learning_rate": 3.547901630540399e-06,
"loss": 0.5814,
"step": 9565
},
{
"epoch": 0.61,
"grad_norm": 0.9117797017097473,
"learning_rate": 3.546919888406313e-06,
"loss": 0.5731,
"step": 9566
},
{
"epoch": 0.61,
"grad_norm": 0.9465237855911255,
"learning_rate": 3.5459382074563737e-06,
"loss": 0.584,
"step": 9567
},
{
"epoch": 0.61,
"grad_norm": 0.8362681269645691,
"learning_rate": 3.5449565877319175e-06,
"loss": 0.5362,
"step": 9568
},
{
"epoch": 0.61,
"grad_norm": 0.8532899022102356,
"learning_rate": 3.54397502927428e-06,
"loss": 0.5545,
"step": 9569
},
{
"epoch": 0.61,
"grad_norm": 0.8592314124107361,
"learning_rate": 3.5429935321247887e-06,
"loss": 0.6031,
"step": 9570
},
{
"epoch": 0.61,
"grad_norm": 0.9131558537483215,
"learning_rate": 3.5420120963247706e-06,
"loss": 0.5733,
"step": 9571
},
{
"epoch": 0.61,
"grad_norm": 0.9123291373252869,
"learning_rate": 3.5410307219155495e-06,
"loss": 0.5963,
"step": 9572
},
{
"epoch": 0.61,
"grad_norm": 0.9059653282165527,
"learning_rate": 3.540049408938452e-06,
"loss": 0.5867,
"step": 9573
},
{
"epoch": 0.61,
"grad_norm": 0.8649680614471436,
"learning_rate": 3.539068157434794e-06,
"loss": 0.5413,
"step": 9574
},
{
"epoch": 0.61,
"grad_norm": 0.8861904144287109,
"learning_rate": 3.538086967445894e-06,
"loss": 0.5541,
"step": 9575
},
{
"epoch": 0.61,
"grad_norm": 0.8411959409713745,
"learning_rate": 3.5371058390130643e-06,
"loss": 0.6017,
"step": 9576
},
{
"epoch": 0.61,
"grad_norm": 0.8303496837615967,
"learning_rate": 3.536124772177621e-06,
"loss": 0.5385,
"step": 9577
},
{
"epoch": 0.61,
"grad_norm": 0.8831817507743835,
"learning_rate": 3.535143766980871e-06,
"loss": 0.5553,
"step": 9578
},
{
"epoch": 0.61,
"grad_norm": 0.8853132128715515,
"learning_rate": 3.53416282346412e-06,
"loss": 0.5881,
"step": 9579
},
{
"epoch": 0.61,
"grad_norm": 0.9052870869636536,
"learning_rate": 3.533181941668675e-06,
"loss": 0.5308,
"step": 9580
},
{
"epoch": 0.61,
"grad_norm": 0.836897611618042,
"learning_rate": 3.5322011216358325e-06,
"loss": 0.5369,
"step": 9581
},
{
"epoch": 0.61,
"grad_norm": 0.9233197569847107,
"learning_rate": 3.5312203634068977e-06,
"loss": 0.6164,
"step": 9582
},
{
"epoch": 0.61,
"grad_norm": 0.883269727230072,
"learning_rate": 3.5302396670231622e-06,
"loss": 0.6196,
"step": 9583
},
{
"epoch": 0.61,
"grad_norm": 0.8604480028152466,
"learning_rate": 3.529259032525923e-06,
"loss": 0.5845,
"step": 9584
},
{
"epoch": 0.61,
"grad_norm": 0.8859973549842834,
"learning_rate": 3.5282784599564667e-06,
"loss": 0.5217,
"step": 9585
},
{
"epoch": 0.61,
"grad_norm": 0.8377750515937805,
"learning_rate": 3.5272979493560877e-06,
"loss": 0.5654,
"step": 9586
},
{
"epoch": 0.61,
"grad_norm": 0.901696503162384,
"learning_rate": 3.5263175007660676e-06,
"loss": 0.6074,
"step": 9587
},
{
"epoch": 0.61,
"grad_norm": 0.8750433325767517,
"learning_rate": 3.5253371142276915e-06,
"loss": 0.5781,
"step": 9588
},
{
"epoch": 0.61,
"grad_norm": 0.988763689994812,
"learning_rate": 3.5243567897822382e-06,
"loss": 0.6265,
"step": 9589
},
{
"epoch": 0.61,
"grad_norm": 0.9134507179260254,
"learning_rate": 3.5233765274709885e-06,
"loss": 0.5861,
"step": 9590
},
{
"epoch": 0.61,
"grad_norm": 0.8977269530296326,
"learning_rate": 3.5223963273352157e-06,
"loss": 0.6029,
"step": 9591
},
{
"epoch": 0.61,
"grad_norm": 0.9446091651916504,
"learning_rate": 3.5214161894161948e-06,
"loss": 0.6057,
"step": 9592
},
{
"epoch": 0.61,
"grad_norm": 0.9235208630561829,
"learning_rate": 3.5204361137551924e-06,
"loss": 0.5999,
"step": 9593
},
{
"epoch": 0.61,
"grad_norm": 0.9178057312965393,
"learning_rate": 3.5194561003934798e-06,
"loss": 0.5572,
"step": 9594
},
{
"epoch": 0.61,
"grad_norm": 0.886055052280426,
"learning_rate": 3.5184761493723197e-06,
"loss": 0.5808,
"step": 9595
},
{
"epoch": 0.61,
"grad_norm": 0.9092001914978027,
"learning_rate": 3.5174962607329755e-06,
"loss": 0.5728,
"step": 9596
},
{
"epoch": 0.61,
"grad_norm": 0.830111026763916,
"learning_rate": 3.516516434516707e-06,
"loss": 0.5996,
"step": 9597
},
{
"epoch": 0.61,
"grad_norm": 0.9029525518417358,
"learning_rate": 3.5155366707647686e-06,
"loss": 0.566,
"step": 9598
},
{
"epoch": 0.61,
"grad_norm": 0.9275105595588684,
"learning_rate": 3.514556969518418e-06,
"loss": 0.6466,
"step": 9599
},
{
"epoch": 0.61,
"grad_norm": 0.8675131797790527,
"learning_rate": 3.513577330818907e-06,
"loss": 0.5825,
"step": 9600
},
{
"epoch": 0.61,
"grad_norm": 0.9288623929023743,
"learning_rate": 3.512597754707484e-06,
"loss": 0.6074,
"step": 9601
},
{
"epoch": 0.61,
"grad_norm": 0.8611968159675598,
"learning_rate": 3.511618241225393e-06,
"loss": 0.586,
"step": 9602
},
{
"epoch": 0.61,
"grad_norm": 0.8688474893569946,
"learning_rate": 3.5106387904138804e-06,
"loss": 0.5534,
"step": 9603
},
{
"epoch": 0.61,
"grad_norm": 0.8907158374786377,
"learning_rate": 3.5096594023141895e-06,
"loss": 0.5984,
"step": 9604
},
{
"epoch": 0.61,
"grad_norm": 0.9105345606803894,
"learning_rate": 3.508680076967556e-06,
"loss": 0.6486,
"step": 9605
},
{
"epoch": 0.61,
"grad_norm": 0.9449050426483154,
"learning_rate": 3.507700814415215e-06,
"loss": 0.563,
"step": 9606
},
{
"epoch": 0.61,
"grad_norm": 0.9659131169319153,
"learning_rate": 3.5067216146984016e-06,
"loss": 0.6047,
"step": 9607
},
{
"epoch": 0.61,
"grad_norm": 0.8895873427391052,
"learning_rate": 3.505742477858348e-06,
"loss": 0.5464,
"step": 9608
},
{
"epoch": 0.61,
"grad_norm": 0.875363826751709,
"learning_rate": 3.50476340393628e-06,
"loss": 0.534,
"step": 9609
},
{
"epoch": 0.61,
"grad_norm": 0.8972344398498535,
"learning_rate": 3.5037843929734216e-06,
"loss": 0.5933,
"step": 9610
},
{
"epoch": 0.61,
"grad_norm": 0.9015132784843445,
"learning_rate": 3.502805445010998e-06,
"loss": 0.5126,
"step": 9611
},
{
"epoch": 0.61,
"grad_norm": 0.8442445993423462,
"learning_rate": 3.5018265600902313e-06,
"loss": 0.5688,
"step": 9612
},
{
"epoch": 0.61,
"grad_norm": 0.9495238065719604,
"learning_rate": 3.5008477382523355e-06,
"loss": 0.5862,
"step": 9613
},
{
"epoch": 0.61,
"grad_norm": 0.8787839412689209,
"learning_rate": 3.4998689795385245e-06,
"loss": 0.5882,
"step": 9614
},
{
"epoch": 0.61,
"grad_norm": 0.9087364673614502,
"learning_rate": 3.4988902839900118e-06,
"loss": 0.6195,
"step": 9615
},
{
"epoch": 0.61,
"grad_norm": 0.9128758907318115,
"learning_rate": 3.4979116516480094e-06,
"loss": 0.5856,
"step": 9616
},
{
"epoch": 0.61,
"grad_norm": 0.9483041167259216,
"learning_rate": 3.496933082553722e-06,
"loss": 0.6421,
"step": 9617
},
{
"epoch": 0.61,
"grad_norm": 0.8916476964950562,
"learning_rate": 3.495954576748353e-06,
"loss": 0.5491,
"step": 9618
},
{
"epoch": 0.61,
"grad_norm": 0.895670473575592,
"learning_rate": 3.4949761342731025e-06,
"loss": 0.6149,
"step": 9619
},
{
"epoch": 0.61,
"grad_norm": 0.9284258484840393,
"learning_rate": 3.493997755169174e-06,
"loss": 0.6185,
"step": 9620
},
{
"epoch": 0.61,
"grad_norm": 0.8784351348876953,
"learning_rate": 3.4930194394777615e-06,
"loss": 0.5884,
"step": 9621
},
{
"epoch": 0.61,
"grad_norm": 0.8456250429153442,
"learning_rate": 3.492041187240056e-06,
"loss": 0.5612,
"step": 9622
},
{
"epoch": 0.61,
"grad_norm": 0.8818730115890503,
"learning_rate": 3.49106299849725e-06,
"loss": 0.5623,
"step": 9623
},
{
"epoch": 0.61,
"grad_norm": 0.9130449891090393,
"learning_rate": 3.4900848732905348e-06,
"loss": 0.6577,
"step": 9624
},
{
"epoch": 0.61,
"grad_norm": 0.9004276990890503,
"learning_rate": 3.4891068116610914e-06,
"loss": 0.6268,
"step": 9625
},
{
"epoch": 0.61,
"grad_norm": 0.8974410891532898,
"learning_rate": 3.4881288136501036e-06,
"loss": 0.6242,
"step": 9626
},
{
"epoch": 0.61,
"grad_norm": 0.9068264961242676,
"learning_rate": 3.487150879298753e-06,
"loss": 0.5797,
"step": 9627
},
{
"epoch": 0.61,
"grad_norm": 0.8939555883407593,
"learning_rate": 3.486173008648215e-06,
"loss": 0.5922,
"step": 9628
},
{
"epoch": 0.61,
"grad_norm": 0.8704434037208557,
"learning_rate": 3.485195201739665e-06,
"loss": 0.5675,
"step": 9629
},
{
"epoch": 0.61,
"grad_norm": 0.8694623112678528,
"learning_rate": 3.4842174586142772e-06,
"loss": 0.5603,
"step": 9630
},
{
"epoch": 0.61,
"grad_norm": 0.9141775965690613,
"learning_rate": 3.4832397793132187e-06,
"loss": 0.6373,
"step": 9631
},
{
"epoch": 0.61,
"grad_norm": 0.9108834862709045,
"learning_rate": 3.4822621638776555e-06,
"loss": 0.5756,
"step": 9632
},
{
"epoch": 0.61,
"grad_norm": 0.8546727895736694,
"learning_rate": 3.4812846123487532e-06,
"loss": 0.5232,
"step": 9633
},
{
"epoch": 0.61,
"grad_norm": 0.9427882432937622,
"learning_rate": 3.4803071247676735e-06,
"loss": 0.5932,
"step": 9634
},
{
"epoch": 0.61,
"grad_norm": 0.846181333065033,
"learning_rate": 3.4793297011755746e-06,
"loss": 0.5629,
"step": 9635
},
{
"epoch": 0.61,
"grad_norm": 0.8653784394264221,
"learning_rate": 3.4783523416136096e-06,
"loss": 0.6068,
"step": 9636
},
{
"epoch": 0.61,
"grad_norm": 0.8583688139915466,
"learning_rate": 3.477375046122935e-06,
"loss": 0.583,
"step": 9637
},
{
"epoch": 0.61,
"grad_norm": 0.9034234881401062,
"learning_rate": 3.476397814744702e-06,
"loss": 0.6093,
"step": 9638
},
{
"epoch": 0.61,
"grad_norm": 0.8735195994377136,
"learning_rate": 3.4754206475200556e-06,
"loss": 0.5769,
"step": 9639
},
{
"epoch": 0.61,
"grad_norm": 0.9497086405754089,
"learning_rate": 3.4744435444901412e-06,
"loss": 0.5695,
"step": 9640
},
{
"epoch": 0.61,
"grad_norm": 0.8908088207244873,
"learning_rate": 3.473466505696103e-06,
"loss": 0.5208,
"step": 9641
},
{
"epoch": 0.61,
"grad_norm": 0.9061847925186157,
"learning_rate": 3.4724895311790806e-06,
"loss": 0.5601,
"step": 9642
},
{
"epoch": 0.61,
"grad_norm": 0.8748029470443726,
"learning_rate": 3.4715126209802104e-06,
"loss": 0.5772,
"step": 9643
},
{
"epoch": 0.61,
"grad_norm": 0.8988456130027771,
"learning_rate": 3.4705357751406256e-06,
"loss": 0.596,
"step": 9644
},
{
"epoch": 0.61,
"grad_norm": 0.8625943064689636,
"learning_rate": 3.469558993701457e-06,
"loss": 0.5393,
"step": 9645
},
{
"epoch": 0.61,
"grad_norm": 0.915705680847168,
"learning_rate": 3.468582276703838e-06,
"loss": 0.5994,
"step": 9646
},
{
"epoch": 0.61,
"grad_norm": 0.9504239559173584,
"learning_rate": 3.467605624188891e-06,
"loss": 0.5692,
"step": 9647
},
{
"epoch": 0.61,
"grad_norm": 0.8617990016937256,
"learning_rate": 3.46662903619774e-06,
"loss": 0.572,
"step": 9648
},
{
"epoch": 0.61,
"grad_norm": 0.9895144104957581,
"learning_rate": 3.4656525127715045e-06,
"loss": 0.5718,
"step": 9649
},
{
"epoch": 0.61,
"grad_norm": 0.9112670421600342,
"learning_rate": 3.464676053951307e-06,
"loss": 0.5922,
"step": 9650
},
{
"epoch": 0.61,
"grad_norm": 0.8914951682090759,
"learning_rate": 3.463699659778259e-06,
"loss": 0.5979,
"step": 9651
},
{
"epoch": 0.61,
"grad_norm": 0.9271389245986938,
"learning_rate": 3.4627233302934737e-06,
"loss": 0.6252,
"step": 9652
},
{
"epoch": 0.61,
"grad_norm": 0.9418599009513855,
"learning_rate": 3.4617470655380597e-06,
"loss": 0.5709,
"step": 9653
},
{
"epoch": 0.61,
"grad_norm": 0.8820015788078308,
"learning_rate": 3.460770865553128e-06,
"loss": 0.5428,
"step": 9654
},
{
"epoch": 0.61,
"grad_norm": 0.9147693514823914,
"learning_rate": 3.4597947303797795e-06,
"loss": 0.636,
"step": 9655
},
{
"epoch": 0.61,
"grad_norm": 0.9040268063545227,
"learning_rate": 3.458818660059117e-06,
"loss": 0.5763,
"step": 9656
},
{
"epoch": 0.61,
"grad_norm": 0.893278956413269,
"learning_rate": 3.4578426546322403e-06,
"loss": 0.6656,
"step": 9657
},
{
"epoch": 0.61,
"grad_norm": 0.8358849883079529,
"learning_rate": 3.4568667141402425e-06,
"loss": 0.5922,
"step": 9658
},
{
"epoch": 0.61,
"grad_norm": 0.889038622379303,
"learning_rate": 3.4558908386242208e-06,
"loss": 0.5502,
"step": 9659
},
{
"epoch": 0.61,
"grad_norm": 0.8911066055297852,
"learning_rate": 3.4549150281252635e-06,
"loss": 0.6117,
"step": 9660
},
{
"epoch": 0.61,
"grad_norm": 0.9033711552619934,
"learning_rate": 3.4539392826844607e-06,
"loss": 0.6014,
"step": 9661
},
{
"epoch": 0.61,
"grad_norm": 0.935605525970459,
"learning_rate": 3.4529636023428935e-06,
"loss": 0.5503,
"step": 9662
},
{
"epoch": 0.61,
"grad_norm": 0.9369493126869202,
"learning_rate": 3.4519879871416505e-06,
"loss": 0.5497,
"step": 9663
},
{
"epoch": 0.61,
"grad_norm": 0.8504288792610168,
"learning_rate": 3.451012437121806e-06,
"loss": 0.5578,
"step": 9664
},
{
"epoch": 0.61,
"grad_norm": 0.9002702832221985,
"learning_rate": 3.4500369523244414e-06,
"loss": 0.5544,
"step": 9665
},
{
"epoch": 0.61,
"grad_norm": 0.8431366086006165,
"learning_rate": 3.4490615327906264e-06,
"loss": 0.5637,
"step": 9666
},
{
"epoch": 0.61,
"grad_norm": 0.8854979872703552,
"learning_rate": 3.448086178561436e-06,
"loss": 0.5606,
"step": 9667
},
{
"epoch": 0.61,
"grad_norm": 0.9128515720367432,
"learning_rate": 3.447110889677938e-06,
"loss": 0.6465,
"step": 9668
},
{
"epoch": 0.61,
"grad_norm": 0.9017817974090576,
"learning_rate": 3.4461356661811997e-06,
"loss": 0.5738,
"step": 9669
},
{
"epoch": 0.61,
"grad_norm": 0.831354558467865,
"learning_rate": 3.4451605081122797e-06,
"loss": 0.5026,
"step": 9670
},
{
"epoch": 0.61,
"grad_norm": 0.9157218933105469,
"learning_rate": 3.4441854155122446e-06,
"loss": 0.5886,
"step": 9671
},
{
"epoch": 0.61,
"grad_norm": 0.9166000485420227,
"learning_rate": 3.443210388422148e-06,
"loss": 0.5729,
"step": 9672
},
{
"epoch": 0.61,
"grad_norm": 0.8740888833999634,
"learning_rate": 3.4422354268830473e-06,
"loss": 0.6174,
"step": 9673
},
{
"epoch": 0.61,
"grad_norm": 0.8689664006233215,
"learning_rate": 3.441260530935994e-06,
"loss": 0.5786,
"step": 9674
},
{
"epoch": 0.61,
"grad_norm": 0.9302951693534851,
"learning_rate": 3.4402857006220353e-06,
"loss": 0.6591,
"step": 9675
},
{
"epoch": 0.61,
"grad_norm": 0.9434636235237122,
"learning_rate": 3.439310935982221e-06,
"loss": 0.6147,
"step": 9676
},
{
"epoch": 0.61,
"grad_norm": 0.9156984686851501,
"learning_rate": 3.4383362370575947e-06,
"loss": 0.5605,
"step": 9677
},
{
"epoch": 0.61,
"grad_norm": 0.866672933101654,
"learning_rate": 3.4373616038891966e-06,
"loss": 0.6085,
"step": 9678
},
{
"epoch": 0.61,
"grad_norm": 0.8342917561531067,
"learning_rate": 3.4363870365180634e-06,
"loss": 0.5376,
"step": 9679
},
{
"epoch": 0.61,
"grad_norm": 0.8971079587936401,
"learning_rate": 3.435412534985234e-06,
"loss": 0.5982,
"step": 9680
},
{
"epoch": 0.61,
"grad_norm": 0.8973367810249329,
"learning_rate": 3.4344380993317404e-06,
"loss": 0.601,
"step": 9681
},
{
"epoch": 0.61,
"grad_norm": 0.9082698822021484,
"learning_rate": 3.433463729598613e-06,
"loss": 0.6087,
"step": 9682
},
{
"epoch": 0.61,
"grad_norm": 0.8526648879051208,
"learning_rate": 3.432489425826876e-06,
"loss": 0.5689,
"step": 9683
},
{
"epoch": 0.61,
"grad_norm": 0.8531283736228943,
"learning_rate": 3.431515188057557e-06,
"loss": 0.5591,
"step": 9684
},
{
"epoch": 0.61,
"grad_norm": 0.8188491463661194,
"learning_rate": 3.4305410163316788e-06,
"loss": 0.5402,
"step": 9685
},
{
"epoch": 0.61,
"grad_norm": 0.8680586218833923,
"learning_rate": 3.429566910690258e-06,
"loss": 0.5776,
"step": 9686
},
{
"epoch": 0.61,
"grad_norm": 0.9405946731567383,
"learning_rate": 3.42859287117431e-06,
"loss": 0.611,
"step": 9687
},
{
"epoch": 0.61,
"grad_norm": 0.9357609152793884,
"learning_rate": 3.42761889782485e-06,
"loss": 0.621,
"step": 9688
},
{
"epoch": 0.61,
"grad_norm": 0.9674767255783081,
"learning_rate": 3.4266449906828897e-06,
"loss": 0.5803,
"step": 9689
},
{
"epoch": 0.61,
"grad_norm": 0.9120761752128601,
"learning_rate": 3.4256711497894346e-06,
"loss": 0.5951,
"step": 9690
},
{
"epoch": 0.61,
"grad_norm": 0.8053117394447327,
"learning_rate": 3.4246973751854917e-06,
"loss": 0.5611,
"step": 9691
},
{
"epoch": 0.61,
"grad_norm": 0.9436960220336914,
"learning_rate": 3.42372366691206e-06,
"loss": 0.5907,
"step": 9692
},
{
"epoch": 0.61,
"grad_norm": 0.9401655197143555,
"learning_rate": 3.422750025010143e-06,
"loss": 0.5508,
"step": 9693
},
{
"epoch": 0.61,
"grad_norm": 0.9254014492034912,
"learning_rate": 3.421776449520735e-06,
"loss": 0.5881,
"step": 9694
},
{
"epoch": 0.61,
"grad_norm": 0.9455965757369995,
"learning_rate": 3.4208029404848315e-06,
"loss": 0.5914,
"step": 9695
},
{
"epoch": 0.61,
"grad_norm": 0.8691204786300659,
"learning_rate": 3.4198294979434207e-06,
"loss": 0.5917,
"step": 9696
},
{
"epoch": 0.61,
"grad_norm": 0.9164184927940369,
"learning_rate": 3.418856121937494e-06,
"loss": 0.5272,
"step": 9697
},
{
"epoch": 0.61,
"grad_norm": 0.857101559638977,
"learning_rate": 3.4178828125080354e-06,
"loss": 0.6098,
"step": 9698
},
{
"epoch": 0.61,
"grad_norm": 0.9042714238166809,
"learning_rate": 3.4169095696960287e-06,
"loss": 0.6457,
"step": 9699
},
{
"epoch": 0.61,
"grad_norm": 0.87809157371521,
"learning_rate": 3.4159363935424505e-06,
"loss": 0.5905,
"step": 9700
},
{
"epoch": 0.61,
"grad_norm": 0.91737961769104,
"learning_rate": 3.4149632840882817e-06,
"loss": 0.5974,
"step": 9701
},
{
"epoch": 0.61,
"grad_norm": 0.8448576331138611,
"learning_rate": 3.413990241374495e-06,
"loss": 0.5751,
"step": 9702
},
{
"epoch": 0.61,
"grad_norm": 0.8721796274185181,
"learning_rate": 3.4130172654420623e-06,
"loss": 0.5554,
"step": 9703
},
{
"epoch": 0.61,
"grad_norm": 0.9579261541366577,
"learning_rate": 3.4120443563319527e-06,
"loss": 0.5359,
"step": 9704
},
{
"epoch": 0.61,
"grad_norm": 0.9207855463027954,
"learning_rate": 3.4110715140851286e-06,
"loss": 0.5518,
"step": 9705
},
{
"epoch": 0.61,
"grad_norm": 0.9483494758605957,
"learning_rate": 3.4100987387425566e-06,
"loss": 0.6161,
"step": 9706
},
{
"epoch": 0.61,
"grad_norm": 0.9150481820106506,
"learning_rate": 3.4091260303451967e-06,
"loss": 0.5725,
"step": 9707
},
{
"epoch": 0.62,
"grad_norm": 0.8735103607177734,
"learning_rate": 3.4081533889340056e-06,
"loss": 0.5568,
"step": 9708
},
{
"epoch": 0.62,
"grad_norm": 0.8997935652732849,
"learning_rate": 3.407180814549935e-06,
"loss": 0.5814,
"step": 9709
},
{
"epoch": 0.62,
"grad_norm": 0.9537015557289124,
"learning_rate": 3.4062083072339415e-06,
"loss": 0.6278,
"step": 9710
},
{
"epoch": 0.62,
"grad_norm": 0.9637327194213867,
"learning_rate": 3.4052358670269727e-06,
"loss": 0.6356,
"step": 9711
},
{
"epoch": 0.62,
"grad_norm": 0.8522918224334717,
"learning_rate": 3.4042634939699728e-06,
"loss": 0.5623,
"step": 9712
},
{
"epoch": 0.62,
"grad_norm": 0.9028704762458801,
"learning_rate": 3.4032911881038842e-06,
"loss": 0.6201,
"step": 9713
},
{
"epoch": 0.62,
"grad_norm": 0.9436061382293701,
"learning_rate": 3.4023189494696506e-06,
"loss": 0.6674,
"step": 9714
},
{
"epoch": 0.62,
"grad_norm": 0.9471274018287659,
"learning_rate": 3.401346778108209e-06,
"loss": 0.6199,
"step": 9715
},
{
"epoch": 0.62,
"grad_norm": 0.9044203162193298,
"learning_rate": 3.4003746740604925e-06,
"loss": 0.5994,
"step": 9716
},
{
"epoch": 0.62,
"grad_norm": 0.8830143213272095,
"learning_rate": 3.399402637367433e-06,
"loss": 0.5824,
"step": 9717
},
{
"epoch": 0.62,
"grad_norm": 0.8798513412475586,
"learning_rate": 3.398430668069961e-06,
"loss": 0.5765,
"step": 9718
},
{
"epoch": 0.62,
"grad_norm": 0.9042376279830933,
"learning_rate": 3.3974587662090026e-06,
"loss": 0.6328,
"step": 9719
},
{
"epoch": 0.62,
"grad_norm": 0.9810076355934143,
"learning_rate": 3.396486931825481e-06,
"loss": 0.6196,
"step": 9720
},
{
"epoch": 0.62,
"grad_norm": 0.8763681054115295,
"learning_rate": 3.395515164960316e-06,
"loss": 0.5587,
"step": 9721
},
{
"epoch": 0.62,
"grad_norm": 0.8596003651618958,
"learning_rate": 3.394543465654424e-06,
"loss": 0.6053,
"step": 9722
},
{
"epoch": 0.62,
"grad_norm": 0.9068569540977478,
"learning_rate": 3.393571833948724e-06,
"loss": 0.5969,
"step": 9723
},
{
"epoch": 0.62,
"grad_norm": 0.8864340782165527,
"learning_rate": 3.3926002698841253e-06,
"loss": 0.5411,
"step": 9724
},
{
"epoch": 0.62,
"grad_norm": 0.8910347819328308,
"learning_rate": 3.3916287735015375e-06,
"loss": 0.5857,
"step": 9725
},
{
"epoch": 0.62,
"grad_norm": 0.8630258440971375,
"learning_rate": 3.390657344841865e-06,
"loss": 0.5274,
"step": 9726
},
{
"epoch": 0.62,
"grad_norm": 0.8938130140304565,
"learning_rate": 3.3896859839460155e-06,
"loss": 0.5875,
"step": 9727
},
{
"epoch": 0.62,
"grad_norm": 0.9419970512390137,
"learning_rate": 3.3887146908548875e-06,
"loss": 0.6277,
"step": 9728
},
{
"epoch": 0.62,
"grad_norm": 0.8735063672065735,
"learning_rate": 3.3877434656093777e-06,
"loss": 0.5373,
"step": 9729
},
{
"epoch": 0.62,
"grad_norm": 0.9380084276199341,
"learning_rate": 3.3867723082503807e-06,
"loss": 0.664,
"step": 9730
},
{
"epoch": 0.62,
"grad_norm": 0.8645982146263123,
"learning_rate": 3.385801218818792e-06,
"loss": 0.5602,
"step": 9731
},
{
"epoch": 0.62,
"grad_norm": 0.9503861665725708,
"learning_rate": 3.384830197355499e-06,
"loss": 0.629,
"step": 9732
},
{
"epoch": 0.62,
"grad_norm": 0.875836968421936,
"learning_rate": 3.383859243901385e-06,
"loss": 0.5647,
"step": 9733
},
{
"epoch": 0.62,
"grad_norm": 0.841139018535614,
"learning_rate": 3.3828883584973364e-06,
"loss": 0.5904,
"step": 9734
},
{
"epoch": 0.62,
"grad_norm": 0.9259316325187683,
"learning_rate": 3.3819175411842353e-06,
"loss": 0.6008,
"step": 9735
},
{
"epoch": 0.62,
"grad_norm": 0.8969772458076477,
"learning_rate": 3.3809467920029574e-06,
"loss": 0.5846,
"step": 9736
},
{
"epoch": 0.62,
"grad_norm": 0.8963971138000488,
"learning_rate": 3.3799761109943775e-06,
"loss": 0.5553,
"step": 9737
},
{
"epoch": 0.62,
"grad_norm": 0.8801413178443909,
"learning_rate": 3.3790054981993683e-06,
"loss": 0.5912,
"step": 9738
},
{
"epoch": 0.62,
"grad_norm": 0.9127761125564575,
"learning_rate": 3.3780349536587965e-06,
"loss": 0.5684,
"step": 9739
},
{
"epoch": 0.62,
"grad_norm": 0.8743903636932373,
"learning_rate": 3.377064477413533e-06,
"loss": 0.6024,
"step": 9740
},
{
"epoch": 0.62,
"grad_norm": 0.8177317380905151,
"learning_rate": 3.376094069504437e-06,
"loss": 0.5123,
"step": 9741
},
{
"epoch": 0.62,
"grad_norm": 0.9717310070991516,
"learning_rate": 3.3751237299723715e-06,
"loss": 0.5859,
"step": 9742
},
{
"epoch": 0.62,
"grad_norm": 0.9141378402709961,
"learning_rate": 3.3741534588581915e-06,
"loss": 0.6222,
"step": 9743
},
{
"epoch": 0.62,
"grad_norm": 0.9228180050849915,
"learning_rate": 3.3731832562027555e-06,
"loss": 0.5765,
"step": 9744
},
{
"epoch": 0.62,
"grad_norm": 0.8938828110694885,
"learning_rate": 3.372213122046912e-06,
"loss": 0.5691,
"step": 9745
},
{
"epoch": 0.62,
"grad_norm": 0.8958885669708252,
"learning_rate": 3.3712430564315124e-06,
"loss": 0.5607,
"step": 9746
},
{
"epoch": 0.62,
"grad_norm": 0.9243308305740356,
"learning_rate": 3.3702730593974e-06,
"loss": 0.5898,
"step": 9747
},
{
"epoch": 0.62,
"grad_norm": 0.9135646224021912,
"learning_rate": 3.3693031309854214e-06,
"loss": 0.5836,
"step": 9748
},
{
"epoch": 0.62,
"grad_norm": 0.8542222380638123,
"learning_rate": 3.3683332712364138e-06,
"loss": 0.5579,
"step": 9749
},
{
"epoch": 0.62,
"grad_norm": 0.9198238849639893,
"learning_rate": 3.3673634801912175e-06,
"loss": 0.5955,
"step": 9750
},
{
"epoch": 0.62,
"grad_norm": 0.8537915349006653,
"learning_rate": 3.3663937578906642e-06,
"loss": 0.5355,
"step": 9751
},
{
"epoch": 0.62,
"grad_norm": 0.8687244057655334,
"learning_rate": 3.365424104375587e-06,
"loss": 0.5622,
"step": 9752
},
{
"epoch": 0.62,
"grad_norm": 0.8461039662361145,
"learning_rate": 3.3644545196868146e-06,
"loss": 0.594,
"step": 9753
},
{
"epoch": 0.62,
"grad_norm": 0.8379154801368713,
"learning_rate": 3.3634850038651734e-06,
"loss": 0.6012,
"step": 9754
},
{
"epoch": 0.62,
"grad_norm": 0.870664119720459,
"learning_rate": 3.362515556951485e-06,
"loss": 0.552,
"step": 9755
},
{
"epoch": 0.62,
"grad_norm": 0.8928505182266235,
"learning_rate": 3.36154617898657e-06,
"loss": 0.5454,
"step": 9756
},
{
"epoch": 0.62,
"grad_norm": 0.9355548620223999,
"learning_rate": 3.360576870011246e-06,
"loss": 0.5332,
"step": 9757
},
{
"epoch": 0.62,
"grad_norm": 0.910213828086853,
"learning_rate": 3.3596076300663273e-06,
"loss": 0.5907,
"step": 9758
},
{
"epoch": 0.62,
"grad_norm": 0.8772505521774292,
"learning_rate": 3.3586384591926235e-06,
"loss": 0.5826,
"step": 9759
},
{
"epoch": 0.62,
"grad_norm": 0.932346522808075,
"learning_rate": 3.3576693574309447e-06,
"loss": 0.6092,
"step": 9760
},
{
"epoch": 0.62,
"grad_norm": 0.8593097925186157,
"learning_rate": 3.3567003248220966e-06,
"loss": 0.6045,
"step": 9761
},
{
"epoch": 0.62,
"grad_norm": 0.8780187368392944,
"learning_rate": 3.355731361406882e-06,
"loss": 0.5656,
"step": 9762
},
{
"epoch": 0.62,
"grad_norm": 0.9253993630409241,
"learning_rate": 3.354762467226098e-06,
"loss": 0.5928,
"step": 9763
},
{
"epoch": 0.62,
"grad_norm": 0.9301041960716248,
"learning_rate": 3.3537936423205435e-06,
"loss": 0.5931,
"step": 9764
},
{
"epoch": 0.62,
"grad_norm": 0.8613686561584473,
"learning_rate": 3.352824886731012e-06,
"loss": 0.528,
"step": 9765
},
{
"epoch": 0.62,
"grad_norm": 0.9040000438690186,
"learning_rate": 3.351856200498296e-06,
"loss": 0.6182,
"step": 9766
},
{
"epoch": 0.62,
"grad_norm": 0.8548308610916138,
"learning_rate": 3.3508875836631806e-06,
"loss": 0.5758,
"step": 9767
},
{
"epoch": 0.62,
"grad_norm": 0.8682760000228882,
"learning_rate": 3.3499190362664523e-06,
"loss": 0.515,
"step": 9768
},
{
"epoch": 0.62,
"grad_norm": 0.8267245292663574,
"learning_rate": 3.3489505583488925e-06,
"loss": 0.5448,
"step": 9769
},
{
"epoch": 0.62,
"grad_norm": 0.9359412789344788,
"learning_rate": 3.3479821499512823e-06,
"loss": 0.6277,
"step": 9770
},
{
"epoch": 0.62,
"grad_norm": 0.9154059290885925,
"learning_rate": 3.347013811114396e-06,
"loss": 0.6081,
"step": 9771
},
{
"epoch": 0.62,
"grad_norm": 0.8913496136665344,
"learning_rate": 3.346045541879009e-06,
"loss": 0.5149,
"step": 9772
},
{
"epoch": 0.62,
"grad_norm": 0.8516811728477478,
"learning_rate": 3.3450773422858886e-06,
"loss": 0.605,
"step": 9773
},
{
"epoch": 0.62,
"grad_norm": 0.854370653629303,
"learning_rate": 3.3441092123758055e-06,
"loss": 0.5382,
"step": 9774
},
{
"epoch": 0.62,
"grad_norm": 0.9357313513755798,
"learning_rate": 3.3431411521895228e-06,
"loss": 0.5528,
"step": 9775
},
{
"epoch": 0.62,
"grad_norm": 0.9238491058349609,
"learning_rate": 3.342173161767803e-06,
"loss": 0.6366,
"step": 9776
},
{
"epoch": 0.62,
"grad_norm": 0.8327895402908325,
"learning_rate": 3.341205241151403e-06,
"loss": 0.5646,
"step": 9777
},
{
"epoch": 0.62,
"grad_norm": 0.9005405306816101,
"learning_rate": 3.3402373903810807e-06,
"loss": 0.5825,
"step": 9778
},
{
"epoch": 0.62,
"grad_norm": 0.8644494414329529,
"learning_rate": 3.3392696094975875e-06,
"loss": 0.5506,
"step": 9779
},
{
"epoch": 0.62,
"grad_norm": 0.8813982009887695,
"learning_rate": 3.338301898541675e-06,
"loss": 0.6004,
"step": 9780
},
{
"epoch": 0.62,
"grad_norm": 0.9440781474113464,
"learning_rate": 3.337334257554086e-06,
"loss": 0.6181,
"step": 9781
},
{
"epoch": 0.62,
"grad_norm": 0.8736399412155151,
"learning_rate": 3.3363666865755708e-06,
"loss": 0.5939,
"step": 9782
},
{
"epoch": 0.62,
"grad_norm": 0.929132342338562,
"learning_rate": 3.335399185646865e-06,
"loss": 0.6048,
"step": 9783
},
{
"epoch": 0.62,
"grad_norm": 0.8123360872268677,
"learning_rate": 3.33443175480871e-06,
"loss": 0.5391,
"step": 9784
},
{
"epoch": 0.62,
"grad_norm": 0.8572057485580444,
"learning_rate": 3.3334643941018398e-06,
"loss": 0.546,
"step": 9785
},
{
"epoch": 0.62,
"grad_norm": 0.8945849537849426,
"learning_rate": 3.3324971035669844e-06,
"loss": 0.6196,
"step": 9786
},
{
"epoch": 0.62,
"grad_norm": 0.8958498239517212,
"learning_rate": 3.3315298832448762e-06,
"loss": 0.5671,
"step": 9787
},
{
"epoch": 0.62,
"grad_norm": 0.9150652885437012,
"learning_rate": 3.330562733176242e-06,
"loss": 0.5584,
"step": 9788
},
{
"epoch": 0.62,
"grad_norm": 0.9838289022445679,
"learning_rate": 3.3295956534018033e-06,
"loss": 0.5973,
"step": 9789
},
{
"epoch": 0.62,
"grad_norm": 0.9296243786811829,
"learning_rate": 3.328628643962278e-06,
"loss": 0.5697,
"step": 9790
},
{
"epoch": 0.62,
"grad_norm": 0.8552951216697693,
"learning_rate": 3.3276617048983876e-06,
"loss": 0.5475,
"step": 9791
},
{
"epoch": 0.62,
"grad_norm": 0.958833634853363,
"learning_rate": 3.326694836250847e-06,
"loss": 0.615,
"step": 9792
},
{
"epoch": 0.62,
"grad_norm": 0.9096387028694153,
"learning_rate": 3.325728038060365e-06,
"loss": 0.5835,
"step": 9793
},
{
"epoch": 0.62,
"grad_norm": 0.9228499531745911,
"learning_rate": 3.324761310367649e-06,
"loss": 0.5948,
"step": 9794
},
{
"epoch": 0.62,
"grad_norm": 0.9237566590309143,
"learning_rate": 3.3237946532134077e-06,
"loss": 0.5998,
"step": 9795
},
{
"epoch": 0.62,
"grad_norm": 0.8698447942733765,
"learning_rate": 3.322828066638343e-06,
"loss": 0.5345,
"step": 9796
},
{
"epoch": 0.62,
"grad_norm": 0.9300669431686401,
"learning_rate": 3.321861550683154e-06,
"loss": 0.6291,
"step": 9797
},
{
"epoch": 0.62,
"grad_norm": 0.9495216012001038,
"learning_rate": 3.3208951053885367e-06,
"loss": 0.6271,
"step": 9798
},
{
"epoch": 0.62,
"grad_norm": 0.8946996331214905,
"learning_rate": 3.3199287307951844e-06,
"loss": 0.5963,
"step": 9799
},
{
"epoch": 0.62,
"grad_norm": 0.8824841976165771,
"learning_rate": 3.31896242694379e-06,
"loss": 0.5869,
"step": 9800
},
{
"epoch": 0.62,
"grad_norm": 0.9500332474708557,
"learning_rate": 3.317996193875041e-06,
"loss": 0.6316,
"step": 9801
},
{
"epoch": 0.62,
"grad_norm": 0.8856709599494934,
"learning_rate": 3.3170300316296194e-06,
"loss": 0.6012,
"step": 9802
},
{
"epoch": 0.62,
"grad_norm": 0.8249387741088867,
"learning_rate": 3.3160639402482077e-06,
"loss": 0.5547,
"step": 9803
},
{
"epoch": 0.62,
"grad_norm": 0.9641744494438171,
"learning_rate": 3.3150979197714874e-06,
"loss": 0.573,
"step": 9804
},
{
"epoch": 0.62,
"grad_norm": 0.9111761450767517,
"learning_rate": 3.314131970240132e-06,
"loss": 0.6098,
"step": 9805
},
{
"epoch": 0.62,
"grad_norm": 0.8709757924079895,
"learning_rate": 3.3131660916948137e-06,
"loss": 0.6311,
"step": 9806
},
{
"epoch": 0.62,
"grad_norm": 0.8643493056297302,
"learning_rate": 3.3122002841762023e-06,
"loss": 0.6367,
"step": 9807
},
{
"epoch": 0.62,
"grad_norm": 0.9116371870040894,
"learning_rate": 3.311234547724968e-06,
"loss": 0.6311,
"step": 9808
},
{
"epoch": 0.62,
"grad_norm": 0.8542858958244324,
"learning_rate": 3.310268882381772e-06,
"loss": 0.5615,
"step": 9809
},
{
"epoch": 0.62,
"grad_norm": 0.8505398035049438,
"learning_rate": 3.3093032881872738e-06,
"loss": 0.537,
"step": 9810
},
{
"epoch": 0.62,
"grad_norm": 0.9205043315887451,
"learning_rate": 3.3083377651821314e-06,
"loss": 0.5719,
"step": 9811
},
{
"epoch": 0.62,
"grad_norm": 0.8506627082824707,
"learning_rate": 3.3073723134070033e-06,
"loss": 0.5692,
"step": 9812
},
{
"epoch": 0.62,
"grad_norm": 0.8526927828788757,
"learning_rate": 3.3064069329025394e-06,
"loss": 0.602,
"step": 9813
},
{
"epoch": 0.62,
"grad_norm": 0.8983103036880493,
"learning_rate": 3.3054416237093863e-06,
"loss": 0.6207,
"step": 9814
},
{
"epoch": 0.62,
"grad_norm": 0.8063021898269653,
"learning_rate": 3.3044763858681928e-06,
"loss": 0.5113,
"step": 9815
},
{
"epoch": 0.62,
"grad_norm": 0.9379715323448181,
"learning_rate": 3.303511219419598e-06,
"loss": 0.603,
"step": 9816
},
{
"epoch": 0.62,
"grad_norm": 0.8616729378700256,
"learning_rate": 3.3025461244042462e-06,
"loss": 0.5416,
"step": 9817
},
{
"epoch": 0.62,
"grad_norm": 0.8878458142280579,
"learning_rate": 3.3015811008627707e-06,
"loss": 0.597,
"step": 9818
},
{
"epoch": 0.62,
"grad_norm": 0.9654011726379395,
"learning_rate": 3.3006161488358084e-06,
"loss": 0.5782,
"step": 9819
},
{
"epoch": 0.62,
"grad_norm": 0.8611358404159546,
"learning_rate": 3.299651268363986e-06,
"loss": 0.5884,
"step": 9820
},
{
"epoch": 0.62,
"grad_norm": 0.8918520212173462,
"learning_rate": 3.298686459487936e-06,
"loss": 0.5348,
"step": 9821
},
{
"epoch": 0.62,
"grad_norm": 0.8771730065345764,
"learning_rate": 3.2977217222482794e-06,
"loss": 0.5562,
"step": 9822
},
{
"epoch": 0.62,
"grad_norm": 0.8952116966247559,
"learning_rate": 3.29675705668564e-06,
"loss": 0.5843,
"step": 9823
},
{
"epoch": 0.62,
"grad_norm": 0.7943683862686157,
"learning_rate": 3.2957924628406347e-06,
"loss": 0.4923,
"step": 9824
},
{
"epoch": 0.62,
"grad_norm": 0.8794922828674316,
"learning_rate": 3.2948279407538813e-06,
"loss": 0.5498,
"step": 9825
},
{
"epoch": 0.62,
"grad_norm": 0.8752865195274353,
"learning_rate": 3.2938634904659903e-06,
"loss": 0.563,
"step": 9826
},
{
"epoch": 0.62,
"grad_norm": 0.8498966097831726,
"learning_rate": 3.2928991120175747e-06,
"loss": 0.5738,
"step": 9827
},
{
"epoch": 0.62,
"grad_norm": 0.9079285860061646,
"learning_rate": 3.2919348054492363e-06,
"loss": 0.5089,
"step": 9828
},
{
"epoch": 0.62,
"grad_norm": 0.9044334292411804,
"learning_rate": 3.2909705708015834e-06,
"loss": 0.6078,
"step": 9829
},
{
"epoch": 0.62,
"grad_norm": 0.8988425135612488,
"learning_rate": 3.290006408115213e-06,
"loss": 0.6227,
"step": 9830
},
{
"epoch": 0.62,
"grad_norm": 0.914106011390686,
"learning_rate": 3.289042317430726e-06,
"loss": 0.5588,
"step": 9831
},
{
"epoch": 0.62,
"grad_norm": 0.8243867754936218,
"learning_rate": 3.2880782987887128e-06,
"loss": 0.5413,
"step": 9832
},
{
"epoch": 0.62,
"grad_norm": 0.8767701983451843,
"learning_rate": 3.2871143522297672e-06,
"loss": 0.5734,
"step": 9833
},
{
"epoch": 0.62,
"grad_norm": 0.8820706605911255,
"learning_rate": 3.286150477794479e-06,
"loss": 0.5971,
"step": 9834
},
{
"epoch": 0.62,
"grad_norm": 0.8267252445220947,
"learning_rate": 3.2851866755234324e-06,
"loss": 0.584,
"step": 9835
},
{
"epoch": 0.62,
"grad_norm": 0.8427024483680725,
"learning_rate": 3.2842229454572084e-06,
"loss": 0.571,
"step": 9836
},
{
"epoch": 0.62,
"grad_norm": 0.9389393925666809,
"learning_rate": 3.2832592876363866e-06,
"loss": 0.6084,
"step": 9837
},
{
"epoch": 0.62,
"grad_norm": 0.9303972125053406,
"learning_rate": 3.2822957021015455e-06,
"loss": 0.5381,
"step": 9838
},
{
"epoch": 0.62,
"grad_norm": 0.9479062557220459,
"learning_rate": 3.2813321888932573e-06,
"loss": 0.5407,
"step": 9839
},
{
"epoch": 0.62,
"grad_norm": 0.808933436870575,
"learning_rate": 3.2803687480520905e-06,
"loss": 0.5186,
"step": 9840
},
{
"epoch": 0.62,
"grad_norm": 0.9015218019485474,
"learning_rate": 3.279405379618613e-06,
"loss": 0.5446,
"step": 9841
},
{
"epoch": 0.62,
"grad_norm": 0.8663600087165833,
"learning_rate": 3.278442083633392e-06,
"loss": 0.5311,
"step": 9842
},
{
"epoch": 0.62,
"grad_norm": 0.94172203540802,
"learning_rate": 3.277478860136985e-06,
"loss": 0.6051,
"step": 9843
},
{
"epoch": 0.62,
"grad_norm": 0.8853269219398499,
"learning_rate": 3.276515709169951e-06,
"loss": 0.5771,
"step": 9844
},
{
"epoch": 0.62,
"grad_norm": 0.8446550965309143,
"learning_rate": 3.2755526307728447e-06,
"loss": 0.5803,
"step": 9845
},
{
"epoch": 0.62,
"grad_norm": 0.8651383519172668,
"learning_rate": 3.2745896249862166e-06,
"loss": 0.5394,
"step": 9846
},
{
"epoch": 0.62,
"grad_norm": 0.923086941242218,
"learning_rate": 3.2736266918506195e-06,
"loss": 0.6304,
"step": 9847
},
{
"epoch": 0.62,
"grad_norm": 0.9140406847000122,
"learning_rate": 3.272663831406595e-06,
"loss": 0.6575,
"step": 9848
},
{
"epoch": 0.62,
"grad_norm": 0.8631356358528137,
"learning_rate": 3.2717010436946894e-06,
"loss": 0.577,
"step": 9849
},
{
"epoch": 0.62,
"grad_norm": 1.065402626991272,
"learning_rate": 3.270738328755438e-06,
"loss": 0.6418,
"step": 9850
},
{
"epoch": 0.62,
"grad_norm": 0.9519265294075012,
"learning_rate": 3.269775686629383e-06,
"loss": 0.6477,
"step": 9851
},
{
"epoch": 0.62,
"grad_norm": 0.9092915058135986,
"learning_rate": 3.2688131173570523e-06,
"loss": 0.6198,
"step": 9852
},
{
"epoch": 0.62,
"grad_norm": 0.9024431109428406,
"learning_rate": 3.2678506209789805e-06,
"loss": 0.6181,
"step": 9853
},
{
"epoch": 0.62,
"grad_norm": 0.8769482970237732,
"learning_rate": 3.2668881975356915e-06,
"loss": 0.5536,
"step": 9854
},
{
"epoch": 0.62,
"grad_norm": 0.9238791465759277,
"learning_rate": 3.2659258470677137e-06,
"loss": 0.6364,
"step": 9855
},
{
"epoch": 0.62,
"grad_norm": 0.7877947092056274,
"learning_rate": 3.2649635696155646e-06,
"loss": 0.4979,
"step": 9856
},
{
"epoch": 0.62,
"grad_norm": 0.84283447265625,
"learning_rate": 3.2640013652197654e-06,
"loss": 0.5688,
"step": 9857
},
{
"epoch": 0.62,
"grad_norm": 0.8760327100753784,
"learning_rate": 3.263039233920827e-06,
"loss": 0.616,
"step": 9858
},
{
"epoch": 0.62,
"grad_norm": 0.9331052303314209,
"learning_rate": 3.2620771757592663e-06,
"loss": 0.5612,
"step": 9859
},
{
"epoch": 0.62,
"grad_norm": 0.9472546577453613,
"learning_rate": 3.261115190775589e-06,
"loss": 0.5579,
"step": 9860
},
{
"epoch": 0.62,
"grad_norm": 0.9176338315010071,
"learning_rate": 3.2601532790103026e-06,
"loss": 0.5493,
"step": 9861
},
{
"epoch": 0.62,
"grad_norm": 0.9220177531242371,
"learning_rate": 3.259191440503909e-06,
"loss": 0.6003,
"step": 9862
},
{
"epoch": 0.62,
"grad_norm": 0.9254795908927917,
"learning_rate": 3.258229675296907e-06,
"loss": 0.607,
"step": 9863
},
{
"epoch": 0.62,
"grad_norm": 0.9141079187393188,
"learning_rate": 3.257267983429794e-06,
"loss": 0.6196,
"step": 9864
},
{
"epoch": 0.62,
"grad_norm": 0.8655530214309692,
"learning_rate": 3.2563063649430647e-06,
"loss": 0.5738,
"step": 9865
},
{
"epoch": 0.63,
"grad_norm": 0.9000252485275269,
"learning_rate": 3.25534481987721e-06,
"loss": 0.6145,
"step": 9866
},
{
"epoch": 0.63,
"grad_norm": 0.9283547401428223,
"learning_rate": 3.2543833482727123e-06,
"loss": 0.6134,
"step": 9867
},
{
"epoch": 0.63,
"grad_norm": 0.8959989547729492,
"learning_rate": 3.2534219501700603e-06,
"loss": 0.5565,
"step": 9868
},
{
"epoch": 0.63,
"grad_norm": 0.8951772451400757,
"learning_rate": 3.252460625609736e-06,
"loss": 0.5718,
"step": 9869
},
{
"epoch": 0.63,
"grad_norm": 0.9525763988494873,
"learning_rate": 3.251499374632214e-06,
"loss": 0.5916,
"step": 9870
},
{
"epoch": 0.63,
"grad_norm": 0.8921371102333069,
"learning_rate": 3.2505381972779704e-06,
"loss": 0.5674,
"step": 9871
},
{
"epoch": 0.63,
"grad_norm": 0.8959813714027405,
"learning_rate": 3.249577093587477e-06,
"loss": 0.5605,
"step": 9872
},
{
"epoch": 0.63,
"grad_norm": 0.9102842807769775,
"learning_rate": 3.2486160636012054e-06,
"loss": 0.6122,
"step": 9873
},
{
"epoch": 0.63,
"grad_norm": 0.8645837306976318,
"learning_rate": 3.2476551073596173e-06,
"loss": 0.5871,
"step": 9874
},
{
"epoch": 0.63,
"grad_norm": 0.9292948246002197,
"learning_rate": 3.246694224903175e-06,
"loss": 0.563,
"step": 9875
},
{
"epoch": 0.63,
"grad_norm": 0.9219769835472107,
"learning_rate": 3.245733416272341e-06,
"loss": 0.5999,
"step": 9876
},
{
"epoch": 0.63,
"grad_norm": 0.9211458563804626,
"learning_rate": 3.2447726815075715e-06,
"loss": 0.5903,
"step": 9877
},
{
"epoch": 0.63,
"grad_norm": 0.8986589312553406,
"learning_rate": 3.243812020649318e-06,
"loss": 0.5675,
"step": 9878
},
{
"epoch": 0.63,
"grad_norm": 0.9727473855018616,
"learning_rate": 3.24285143373803e-06,
"loss": 0.6265,
"step": 9879
},
{
"epoch": 0.63,
"grad_norm": 0.9438537955284119,
"learning_rate": 3.241890920814154e-06,
"loss": 0.5264,
"step": 9880
},
{
"epoch": 0.63,
"grad_norm": 0.9068976640701294,
"learning_rate": 3.2409304819181377e-06,
"loss": 0.5723,
"step": 9881
},
{
"epoch": 0.63,
"grad_norm": 0.8970639705657959,
"learning_rate": 3.2399701170904197e-06,
"loss": 0.5673,
"step": 9882
},
{
"epoch": 0.63,
"grad_norm": 0.8697466850280762,
"learning_rate": 3.239009826371436e-06,
"loss": 0.5677,
"step": 9883
},
{
"epoch": 0.63,
"grad_norm": 0.8627969622612,
"learning_rate": 3.238049609801621e-06,
"loss": 0.5892,
"step": 9884
},
{
"epoch": 0.63,
"grad_norm": 0.8970100283622742,
"learning_rate": 3.2370894674214102e-06,
"loss": 0.5913,
"step": 9885
},
{
"epoch": 0.63,
"grad_norm": 0.811985969543457,
"learning_rate": 3.2361293992712295e-06,
"loss": 0.4866,
"step": 9886
},
{
"epoch": 0.63,
"grad_norm": 0.8113346099853516,
"learning_rate": 3.2351694053915027e-06,
"loss": 0.5692,
"step": 9887
},
{
"epoch": 0.63,
"grad_norm": 0.9210705757141113,
"learning_rate": 3.2342094858226514e-06,
"loss": 0.6221,
"step": 9888
},
{
"epoch": 0.63,
"grad_norm": 0.8682329654693604,
"learning_rate": 3.233249640605098e-06,
"loss": 0.5479,
"step": 9889
},
{
"epoch": 0.63,
"grad_norm": 0.8741553425788879,
"learning_rate": 3.232289869779256e-06,
"loss": 0.5479,
"step": 9890
},
{
"epoch": 0.63,
"grad_norm": 0.8522763252258301,
"learning_rate": 3.231330173385537e-06,
"loss": 0.565,
"step": 9891
},
{
"epoch": 0.63,
"grad_norm": 0.8624328374862671,
"learning_rate": 3.2303705514643537e-06,
"loss": 0.6031,
"step": 9892
},
{
"epoch": 0.63,
"grad_norm": 0.952092707157135,
"learning_rate": 3.229411004056108e-06,
"loss": 0.5751,
"step": 9893
},
{
"epoch": 0.63,
"grad_norm": 0.9104188084602356,
"learning_rate": 3.2284515312012056e-06,
"loss": 0.5759,
"step": 9894
},
{
"epoch": 0.63,
"grad_norm": 0.8913605213165283,
"learning_rate": 3.2274921329400484e-06,
"loss": 0.6126,
"step": 9895
},
{
"epoch": 0.63,
"grad_norm": 0.8454800844192505,
"learning_rate": 3.226532809313031e-06,
"loss": 0.5386,
"step": 9896
},
{
"epoch": 0.63,
"grad_norm": 0.9178531765937805,
"learning_rate": 3.2255735603605454e-06,
"loss": 0.6037,
"step": 9897
},
{
"epoch": 0.63,
"grad_norm": 0.9100960493087769,
"learning_rate": 3.2246143861229857e-06,
"loss": 0.5579,
"step": 9898
},
{
"epoch": 0.63,
"grad_norm": 0.8924016952514648,
"learning_rate": 3.223655286640739e-06,
"loss": 0.5699,
"step": 9899
},
{
"epoch": 0.63,
"grad_norm": 0.9032720327377319,
"learning_rate": 3.2226962619541885e-06,
"loss": 0.5988,
"step": 9900
},
{
"epoch": 0.63,
"grad_norm": 0.8333504796028137,
"learning_rate": 3.221737312103714e-06,
"loss": 0.5045,
"step": 9901
},
{
"epoch": 0.63,
"grad_norm": 0.8808243274688721,
"learning_rate": 3.2207784371296957e-06,
"loss": 0.6074,
"step": 9902
},
{
"epoch": 0.63,
"grad_norm": 0.906588613986969,
"learning_rate": 3.2198196370725095e-06,
"loss": 0.6131,
"step": 9903
},
{
"epoch": 0.63,
"grad_norm": 0.9039662480354309,
"learning_rate": 3.218860911972525e-06,
"loss": 0.637,
"step": 9904
},
{
"epoch": 0.63,
"grad_norm": 0.9129331707954407,
"learning_rate": 3.2179022618701093e-06,
"loss": 0.624,
"step": 9905
},
{
"epoch": 0.63,
"grad_norm": 0.904314398765564,
"learning_rate": 3.2169436868056316e-06,
"loss": 0.5856,
"step": 9906
},
{
"epoch": 0.63,
"grad_norm": 0.8561462163925171,
"learning_rate": 3.215985186819453e-06,
"loss": 0.6,
"step": 9907
},
{
"epoch": 0.63,
"grad_norm": 0.8671022653579712,
"learning_rate": 3.2150267619519326e-06,
"loss": 0.5943,
"step": 9908
},
{
"epoch": 0.63,
"grad_norm": 0.895698070526123,
"learning_rate": 3.214068412243424e-06,
"loss": 0.5647,
"step": 9909
},
{
"epoch": 0.63,
"grad_norm": 0.8691821694374084,
"learning_rate": 3.213110137734281e-06,
"loss": 0.6168,
"step": 9910
},
{
"epoch": 0.63,
"grad_norm": 0.8884726166725159,
"learning_rate": 3.2121519384648558e-06,
"loss": 0.6183,
"step": 9911
},
{
"epoch": 0.63,
"grad_norm": 0.8356814980506897,
"learning_rate": 3.211193814475494e-06,
"loss": 0.5989,
"step": 9912
},
{
"epoch": 0.63,
"grad_norm": 0.8705270290374756,
"learning_rate": 3.2102357658065357e-06,
"loss": 0.6103,
"step": 9913
},
{
"epoch": 0.63,
"grad_norm": 0.9131333827972412,
"learning_rate": 3.2092777924983224e-06,
"loss": 0.6342,
"step": 9914
},
{
"epoch": 0.63,
"grad_norm": 0.9165261387825012,
"learning_rate": 3.208319894591194e-06,
"loss": 0.6076,
"step": 9915
},
{
"epoch": 0.63,
"grad_norm": 0.858545184135437,
"learning_rate": 3.207362072125482e-06,
"loss": 0.5796,
"step": 9916
},
{
"epoch": 0.63,
"grad_norm": 0.8806081414222717,
"learning_rate": 3.2064043251415166e-06,
"loss": 0.5396,
"step": 9917
},
{
"epoch": 0.63,
"grad_norm": 0.9189614057540894,
"learning_rate": 3.2054466536796236e-06,
"loss": 0.5817,
"step": 9918
},
{
"epoch": 0.63,
"grad_norm": 0.9014858603477478,
"learning_rate": 3.2044890577801317e-06,
"loss": 0.5843,
"step": 9919
},
{
"epoch": 0.63,
"grad_norm": 0.9093121886253357,
"learning_rate": 3.2035315374833596e-06,
"loss": 0.5669,
"step": 9920
},
{
"epoch": 0.63,
"grad_norm": 0.8906499743461609,
"learning_rate": 3.2025740928296235e-06,
"loss": 0.5766,
"step": 9921
},
{
"epoch": 0.63,
"grad_norm": 0.9178594350814819,
"learning_rate": 3.201616723859241e-06,
"loss": 0.6233,
"step": 9922
},
{
"epoch": 0.63,
"grad_norm": 0.8954256772994995,
"learning_rate": 3.20065943061252e-06,
"loss": 0.6048,
"step": 9923
},
{
"epoch": 0.63,
"grad_norm": 0.8136070370674133,
"learning_rate": 3.199702213129773e-06,
"loss": 0.5394,
"step": 9924
},
{
"epoch": 0.63,
"grad_norm": 0.8871577382087708,
"learning_rate": 3.1987450714513018e-06,
"loss": 0.5906,
"step": 9925
},
{
"epoch": 0.63,
"grad_norm": 0.8907647728919983,
"learning_rate": 3.1977880056174105e-06,
"loss": 0.5741,
"step": 9926
},
{
"epoch": 0.63,
"grad_norm": 0.8544868230819702,
"learning_rate": 3.196831015668396e-06,
"loss": 0.5634,
"step": 9927
},
{
"epoch": 0.63,
"grad_norm": 0.9441981911659241,
"learning_rate": 3.195874101644555e-06,
"loss": 0.5918,
"step": 9928
},
{
"epoch": 0.63,
"grad_norm": 0.8469243049621582,
"learning_rate": 3.194917263586179e-06,
"loss": 0.5407,
"step": 9929
},
{
"epoch": 0.63,
"grad_norm": 0.8396049737930298,
"learning_rate": 3.1939605015335588e-06,
"loss": 0.5383,
"step": 9930
},
{
"epoch": 0.63,
"grad_norm": 0.8566557168960571,
"learning_rate": 3.193003815526977e-06,
"loss": 0.5878,
"step": 9931
},
{
"epoch": 0.63,
"grad_norm": 0.9029106497764587,
"learning_rate": 3.192047205606721e-06,
"loss": 0.6431,
"step": 9932
},
{
"epoch": 0.63,
"grad_norm": 0.9187177419662476,
"learning_rate": 3.1910906718130665e-06,
"loss": 0.5857,
"step": 9933
},
{
"epoch": 0.63,
"grad_norm": 0.8693289756774902,
"learning_rate": 3.1901342141862917e-06,
"loss": 0.5984,
"step": 9934
},
{
"epoch": 0.63,
"grad_norm": 0.9296219944953918,
"learning_rate": 3.1891778327666673e-06,
"loss": 0.6454,
"step": 9935
},
{
"epoch": 0.63,
"grad_norm": 0.944770097732544,
"learning_rate": 3.1882215275944673e-06,
"loss": 0.6541,
"step": 9936
},
{
"epoch": 0.63,
"grad_norm": 0.8502100706100464,
"learning_rate": 3.187265298709954e-06,
"loss": 0.5676,
"step": 9937
},
{
"epoch": 0.63,
"grad_norm": 0.8540067076683044,
"learning_rate": 3.1863091461533945e-06,
"loss": 0.5624,
"step": 9938
},
{
"epoch": 0.63,
"grad_norm": 0.8409416079521179,
"learning_rate": 3.1853530699650483e-06,
"loss": 0.5844,
"step": 9939
},
{
"epoch": 0.63,
"grad_norm": 0.858970046043396,
"learning_rate": 3.184397070185169e-06,
"loss": 0.6213,
"step": 9940
},
{
"epoch": 0.63,
"grad_norm": 0.8982256054878235,
"learning_rate": 3.183441146854014e-06,
"loss": 0.5477,
"step": 9941
},
{
"epoch": 0.63,
"grad_norm": 0.924256443977356,
"learning_rate": 3.182485300011834e-06,
"loss": 0.6534,
"step": 9942
},
{
"epoch": 0.63,
"grad_norm": 0.8575473427772522,
"learning_rate": 3.181529529698875e-06,
"loss": 0.5467,
"step": 9943
},
{
"epoch": 0.63,
"grad_norm": 0.8267804980278015,
"learning_rate": 3.1805738359553796e-06,
"loss": 0.5687,
"step": 9944
},
{
"epoch": 0.63,
"grad_norm": 0.8258667588233948,
"learning_rate": 3.1796182188215917e-06,
"loss": 0.5367,
"step": 9945
},
{
"epoch": 0.63,
"grad_norm": 0.9159985184669495,
"learning_rate": 3.1786626783377494e-06,
"loss": 0.576,
"step": 9946
},
{
"epoch": 0.63,
"grad_norm": 0.8569443225860596,
"learning_rate": 3.177707214544086e-06,
"loss": 0.5744,
"step": 9947
},
{
"epoch": 0.63,
"grad_norm": 0.8471035957336426,
"learning_rate": 3.1767518274808298e-06,
"loss": 0.5411,
"step": 9948
},
{
"epoch": 0.63,
"grad_norm": 0.8953260779380798,
"learning_rate": 3.175796517188212e-06,
"loss": 0.6003,
"step": 9949
},
{
"epoch": 0.63,
"grad_norm": 0.868668258190155,
"learning_rate": 3.174841283706459e-06,
"loss": 0.5516,
"step": 9950
},
{
"epoch": 0.63,
"grad_norm": 0.8107344508171082,
"learning_rate": 3.17388612707579e-06,
"loss": 0.5666,
"step": 9951
},
{
"epoch": 0.63,
"grad_norm": 0.9241723418235779,
"learning_rate": 3.172931047336421e-06,
"loss": 0.5836,
"step": 9952
},
{
"epoch": 0.63,
"grad_norm": 0.8751961588859558,
"learning_rate": 3.1719760445285712e-06,
"loss": 0.5113,
"step": 9953
},
{
"epoch": 0.63,
"grad_norm": 0.9166142344474792,
"learning_rate": 3.1710211186924524e-06,
"loss": 0.555,
"step": 9954
},
{
"epoch": 0.63,
"grad_norm": 0.889083981513977,
"learning_rate": 3.170066269868271e-06,
"loss": 0.5905,
"step": 9955
},
{
"epoch": 0.63,
"grad_norm": 0.905120313167572,
"learning_rate": 3.169111498096232e-06,
"loss": 0.5809,
"step": 9956
},
{
"epoch": 0.63,
"grad_norm": 0.852555513381958,
"learning_rate": 3.1681568034165383e-06,
"loss": 0.5447,
"step": 9957
},
{
"epoch": 0.63,
"grad_norm": 0.8818122148513794,
"learning_rate": 3.167202185869391e-06,
"loss": 0.5774,
"step": 9958
},
{
"epoch": 0.63,
"grad_norm": 0.9433296918869019,
"learning_rate": 3.166247645494982e-06,
"loss": 0.6099,
"step": 9959
},
{
"epoch": 0.63,
"grad_norm": 0.9000284671783447,
"learning_rate": 3.1652931823335074e-06,
"loss": 0.5622,
"step": 9960
},
{
"epoch": 0.63,
"grad_norm": 0.9485234618186951,
"learning_rate": 3.164338796425152e-06,
"loss": 0.6129,
"step": 9961
},
{
"epoch": 0.63,
"grad_norm": 0.8851210474967957,
"learning_rate": 3.163384487810106e-06,
"loss": 0.542,
"step": 9962
},
{
"epoch": 0.63,
"grad_norm": 0.8798405528068542,
"learning_rate": 3.162430256528549e-06,
"loss": 0.5844,
"step": 9963
},
{
"epoch": 0.63,
"grad_norm": 0.921736478805542,
"learning_rate": 3.161476102620663e-06,
"loss": 0.6119,
"step": 9964
},
{
"epoch": 0.63,
"grad_norm": 0.9609774947166443,
"learning_rate": 3.16052202612662e-06,
"loss": 0.5531,
"step": 9965
},
{
"epoch": 0.63,
"grad_norm": 0.8847622275352478,
"learning_rate": 3.159568027086598e-06,
"loss": 0.6304,
"step": 9966
},
{
"epoch": 0.63,
"grad_norm": 0.9161363244056702,
"learning_rate": 3.1586141055407627e-06,
"loss": 0.6271,
"step": 9967
},
{
"epoch": 0.63,
"grad_norm": 0.8306808471679688,
"learning_rate": 3.157660261529283e-06,
"loss": 0.5713,
"step": 9968
},
{
"epoch": 0.63,
"grad_norm": 0.932395875453949,
"learning_rate": 3.15670649509232e-06,
"loss": 0.5708,
"step": 9969
},
{
"epoch": 0.63,
"grad_norm": 0.890895426273346,
"learning_rate": 3.155752806270033e-06,
"loss": 0.6783,
"step": 9970
},
{
"epoch": 0.63,
"grad_norm": 0.8662253618240356,
"learning_rate": 3.1547991951025795e-06,
"loss": 0.5452,
"step": 9971
},
{
"epoch": 0.63,
"grad_norm": 0.9375318884849548,
"learning_rate": 3.153845661630115e-06,
"loss": 0.6196,
"step": 9972
},
{
"epoch": 0.63,
"grad_norm": 0.8994795680046082,
"learning_rate": 3.152892205892787e-06,
"loss": 0.5902,
"step": 9973
},
{
"epoch": 0.63,
"grad_norm": 0.890771210193634,
"learning_rate": 3.15193882793074e-06,
"loss": 0.5948,
"step": 9974
},
{
"epoch": 0.63,
"grad_norm": 0.8573660254478455,
"learning_rate": 3.150985527784122e-06,
"loss": 0.5463,
"step": 9975
},
{
"epoch": 0.63,
"grad_norm": 0.8332209587097168,
"learning_rate": 3.1500323054930715e-06,
"loss": 0.5577,
"step": 9976
},
{
"epoch": 0.63,
"grad_norm": 0.9283886551856995,
"learning_rate": 3.149079161097725e-06,
"loss": 0.5936,
"step": 9977
},
{
"epoch": 0.63,
"grad_norm": 0.8500183820724487,
"learning_rate": 3.1481260946382143e-06,
"loss": 0.5424,
"step": 9978
},
{
"epoch": 0.63,
"grad_norm": 0.8809803128242493,
"learning_rate": 3.147173106154673e-06,
"loss": 0.6419,
"step": 9979
},
{
"epoch": 0.63,
"grad_norm": 0.8598153591156006,
"learning_rate": 3.146220195687227e-06,
"loss": 0.6031,
"step": 9980
},
{
"epoch": 0.63,
"grad_norm": 0.8905846476554871,
"learning_rate": 3.145267363276e-06,
"loss": 0.5879,
"step": 9981
},
{
"epoch": 0.63,
"grad_norm": 0.8749983906745911,
"learning_rate": 3.1443146089611102e-06,
"loss": 0.566,
"step": 9982
},
{
"epoch": 0.63,
"grad_norm": 0.8304601907730103,
"learning_rate": 3.143361932782678e-06,
"loss": 0.5731,
"step": 9983
},
{
"epoch": 0.63,
"grad_norm": 0.8867066502571106,
"learning_rate": 3.142409334780817e-06,
"loss": 0.5504,
"step": 9984
},
{
"epoch": 0.63,
"grad_norm": 0.8217571377754211,
"learning_rate": 3.1414568149956366e-06,
"loss": 0.4873,
"step": 9985
},
{
"epoch": 0.63,
"grad_norm": 0.8885734677314758,
"learning_rate": 3.1405043734672436e-06,
"loss": 0.5873,
"step": 9986
},
{
"epoch": 0.63,
"grad_norm": 0.8465083241462708,
"learning_rate": 3.1395520102357413e-06,
"loss": 0.5652,
"step": 9987
},
{
"epoch": 0.63,
"grad_norm": 0.9155653119087219,
"learning_rate": 3.1385997253412336e-06,
"loss": 0.5387,
"step": 9988
},
{
"epoch": 0.63,
"grad_norm": 0.9237584471702576,
"learning_rate": 3.137647518823817e-06,
"loss": 0.5978,
"step": 9989
},
{
"epoch": 0.63,
"grad_norm": 0.9412940144538879,
"learning_rate": 3.136695390723583e-06,
"loss": 0.6437,
"step": 9990
},
{
"epoch": 0.63,
"grad_norm": 0.9255321025848389,
"learning_rate": 3.135743341080624e-06,
"loss": 0.59,
"step": 9991
},
{
"epoch": 0.63,
"grad_norm": 0.9065369367599487,
"learning_rate": 3.1347913699350286e-06,
"loss": 0.5753,
"step": 9992
},
{
"epoch": 0.63,
"grad_norm": 0.8333830237388611,
"learning_rate": 3.1338394773268805e-06,
"loss": 0.5217,
"step": 9993
},
{
"epoch": 0.63,
"grad_norm": 0.9209916591644287,
"learning_rate": 3.132887663296259e-06,
"loss": 0.6099,
"step": 9994
},
{
"epoch": 0.63,
"grad_norm": 0.9044961929321289,
"learning_rate": 3.131935927883242e-06,
"loss": 0.566,
"step": 9995
},
{
"epoch": 0.63,
"grad_norm": 0.9058372378349304,
"learning_rate": 3.1309842711279066e-06,
"loss": 0.5774,
"step": 9996
},
{
"epoch": 0.63,
"grad_norm": 0.8610040545463562,
"learning_rate": 3.130032693070322e-06,
"loss": 0.5434,
"step": 9997
},
{
"epoch": 0.63,
"grad_norm": 0.894743800163269,
"learning_rate": 3.129081193750554e-06,
"loss": 0.5637,
"step": 9998
},
{
"epoch": 0.63,
"grad_norm": 0.9117133021354675,
"learning_rate": 3.1281297732086666e-06,
"loss": 0.5844,
"step": 9999
},
{
"epoch": 0.63,
"grad_norm": 0.9719625115394592,
"learning_rate": 3.1271784314847266e-06,
"loss": 0.6132,
"step": 10000
},
{
"epoch": 0.63,
"grad_norm": 0.8312113881111145,
"learning_rate": 3.126227168618786e-06,
"loss": 0.5298,
"step": 10001
},
{
"epoch": 0.63,
"grad_norm": 0.85428786277771,
"learning_rate": 3.1252759846509013e-06,
"loss": 0.5784,
"step": 10002
},
{
"epoch": 0.63,
"grad_norm": 0.8149659633636475,
"learning_rate": 3.1243248796211234e-06,
"loss": 0.5674,
"step": 10003
},
{
"epoch": 0.63,
"grad_norm": 0.8940887451171875,
"learning_rate": 3.123373853569498e-06,
"loss": 0.5869,
"step": 10004
},
{
"epoch": 0.63,
"grad_norm": 0.9396780133247375,
"learning_rate": 3.1224229065360734e-06,
"loss": 0.5875,
"step": 10005
},
{
"epoch": 0.63,
"grad_norm": 0.8760607838630676,
"learning_rate": 3.1214720385608875e-06,
"loss": 0.6323,
"step": 10006
},
{
"epoch": 0.63,
"grad_norm": 0.9258213043212891,
"learning_rate": 3.120521249683981e-06,
"loss": 0.6057,
"step": 10007
},
{
"epoch": 0.63,
"grad_norm": 0.9452094435691833,
"learning_rate": 3.1195705399453833e-06,
"loss": 0.547,
"step": 10008
},
{
"epoch": 0.63,
"grad_norm": 0.8690341711044312,
"learning_rate": 3.118619909385131e-06,
"loss": 0.6319,
"step": 10009
},
{
"epoch": 0.63,
"grad_norm": 0.8699579238891602,
"learning_rate": 3.117669358043248e-06,
"loss": 0.568,
"step": 10010
},
{
"epoch": 0.63,
"grad_norm": 0.9703599214553833,
"learning_rate": 3.116718885959762e-06,
"loss": 0.592,
"step": 10011
},
{
"epoch": 0.63,
"grad_norm": 0.8900342583656311,
"learning_rate": 3.1157684931746902e-06,
"loss": 0.5536,
"step": 10012
},
{
"epoch": 0.63,
"grad_norm": 0.8778373003005981,
"learning_rate": 3.1148181797280543e-06,
"loss": 0.6032,
"step": 10013
},
{
"epoch": 0.63,
"grad_norm": 0.8625448942184448,
"learning_rate": 3.1138679456598654e-06,
"loss": 0.5673,
"step": 10014
},
{
"epoch": 0.63,
"grad_norm": 0.8882395029067993,
"learning_rate": 3.112917791010137e-06,
"loss": 0.6069,
"step": 10015
},
{
"epoch": 0.63,
"grad_norm": 0.8981207013130188,
"learning_rate": 3.111967715818876e-06,
"loss": 0.579,
"step": 10016
},
{
"epoch": 0.63,
"grad_norm": 0.891898512840271,
"learning_rate": 3.1110177201260845e-06,
"loss": 0.5742,
"step": 10017
},
{
"epoch": 0.63,
"grad_norm": 0.9222726225852966,
"learning_rate": 3.1100678039717665e-06,
"loss": 0.553,
"step": 10018
},
{
"epoch": 0.63,
"grad_norm": 0.8938819766044617,
"learning_rate": 3.1091179673959194e-06,
"loss": 0.5761,
"step": 10019
},
{
"epoch": 0.63,
"grad_norm": 0.9439987540245056,
"learning_rate": 3.108168210438536e-06,
"loss": 0.6038,
"step": 10020
},
{
"epoch": 0.63,
"grad_norm": 0.9559965133666992,
"learning_rate": 3.1072185331396083e-06,
"loss": 0.5841,
"step": 10021
},
{
"epoch": 0.63,
"grad_norm": 0.912056565284729,
"learning_rate": 3.106268935539123e-06,
"loss": 0.6017,
"step": 10022
},
{
"epoch": 0.64,
"grad_norm": 0.9289038181304932,
"learning_rate": 3.1053194176770662e-06,
"loss": 0.6042,
"step": 10023
},
{
"epoch": 0.64,
"grad_norm": 0.864149808883667,
"learning_rate": 3.1043699795934172e-06,
"loss": 0.549,
"step": 10024
},
{
"epoch": 0.64,
"grad_norm": 0.9083261489868164,
"learning_rate": 3.1034206213281536e-06,
"loss": 0.6446,
"step": 10025
},
{
"epoch": 0.64,
"grad_norm": 0.8593977689743042,
"learning_rate": 3.10247134292125e-06,
"loss": 0.5199,
"step": 10026
},
{
"epoch": 0.64,
"grad_norm": 0.9130897521972656,
"learning_rate": 3.1015221444126776e-06,
"loss": 0.5645,
"step": 10027
},
{
"epoch": 0.64,
"grad_norm": 0.8100042939186096,
"learning_rate": 3.1005730258424025e-06,
"loss": 0.5811,
"step": 10028
},
{
"epoch": 0.64,
"grad_norm": 0.7985337376594543,
"learning_rate": 3.099623987250391e-06,
"loss": 0.5514,
"step": 10029
},
{
"epoch": 0.64,
"grad_norm": 0.8951230049133301,
"learning_rate": 3.098675028676601e-06,
"loss": 0.6081,
"step": 10030
},
{
"epoch": 0.64,
"grad_norm": 0.844353973865509,
"learning_rate": 3.0977261501609924e-06,
"loss": 0.5776,
"step": 10031
},
{
"epoch": 0.64,
"grad_norm": 0.9215499758720398,
"learning_rate": 3.0967773517435173e-06,
"loss": 0.5944,
"step": 10032
},
{
"epoch": 0.64,
"grad_norm": 0.8491506576538086,
"learning_rate": 3.0958286334641284e-06,
"loss": 0.5225,
"step": 10033
},
{
"epoch": 0.64,
"grad_norm": 0.9156690835952759,
"learning_rate": 3.0948799953627696e-06,
"loss": 0.5782,
"step": 10034
},
{
"epoch": 0.64,
"grad_norm": 0.8732212781906128,
"learning_rate": 3.093931437479388e-06,
"loss": 0.5373,
"step": 10035
},
{
"epoch": 0.64,
"grad_norm": 0.8561059236526489,
"learning_rate": 3.092982959853923e-06,
"loss": 0.5791,
"step": 10036
},
{
"epoch": 0.64,
"grad_norm": 0.9923532605171204,
"learning_rate": 3.092034562526312e-06,
"loss": 0.619,
"step": 10037
},
{
"epoch": 0.64,
"grad_norm": 0.8949557542800903,
"learning_rate": 3.0910862455364864e-06,
"loss": 0.5534,
"step": 10038
},
{
"epoch": 0.64,
"grad_norm": 0.9672521948814392,
"learning_rate": 3.09013800892438e-06,
"loss": 0.6306,
"step": 10039
},
{
"epoch": 0.64,
"grad_norm": 0.8525355458259583,
"learning_rate": 3.0891898527299167e-06,
"loss": 0.5502,
"step": 10040
},
{
"epoch": 0.64,
"grad_norm": 0.8738742470741272,
"learning_rate": 3.088241776993024e-06,
"loss": 0.5939,
"step": 10041
},
{
"epoch": 0.64,
"grad_norm": 0.9154573082923889,
"learning_rate": 3.0872937817536165e-06,
"loss": 0.6274,
"step": 10042
},
{
"epoch": 0.64,
"grad_norm": 0.8766052722930908,
"learning_rate": 3.0863458670516157e-06,
"loss": 0.5632,
"step": 10043
},
{
"epoch": 0.64,
"grad_norm": 0.9145663976669312,
"learning_rate": 3.085398032926933e-06,
"loss": 0.5808,
"step": 10044
},
{
"epoch": 0.64,
"grad_norm": 0.9256823062896729,
"learning_rate": 3.0844502794194795e-06,
"loss": 0.6116,
"step": 10045
},
{
"epoch": 0.64,
"grad_norm": 0.8836879730224609,
"learning_rate": 3.083502606569159e-06,
"loss": 0.5633,
"step": 10046
},
{
"epoch": 0.64,
"grad_norm": 0.8442484736442566,
"learning_rate": 3.0825550144158788e-06,
"loss": 0.5449,
"step": 10047
},
{
"epoch": 0.64,
"grad_norm": 0.8978825807571411,
"learning_rate": 3.081607502999536e-06,
"loss": 0.5596,
"step": 10048
},
{
"epoch": 0.64,
"grad_norm": 0.8920104503631592,
"learning_rate": 3.0806600723600275e-06,
"loss": 0.5789,
"step": 10049
},
{
"epoch": 0.64,
"grad_norm": 0.8824292421340942,
"learning_rate": 3.0797127225372477e-06,
"loss": 0.5769,
"step": 10050
},
{
"epoch": 0.64,
"grad_norm": 0.8434662818908691,
"learning_rate": 3.078765453571082e-06,
"loss": 0.5399,
"step": 10051
},
{
"epoch": 0.64,
"grad_norm": 0.8964858055114746,
"learning_rate": 3.077818265501421e-06,
"loss": 0.5802,
"step": 10052
},
{
"epoch": 0.64,
"grad_norm": 0.8828626871109009,
"learning_rate": 3.0768711583681475e-06,
"loss": 0.5715,
"step": 10053
},
{
"epoch": 0.64,
"grad_norm": 0.8482038974761963,
"learning_rate": 3.075924132211139e-06,
"loss": 0.5171,
"step": 10054
},
{
"epoch": 0.64,
"grad_norm": 0.8848569989204407,
"learning_rate": 3.07497718707027e-06,
"loss": 0.5214,
"step": 10055
},
{
"epoch": 0.64,
"grad_norm": 0.9475182294845581,
"learning_rate": 3.074030322985416e-06,
"loss": 0.5741,
"step": 10056
},
{
"epoch": 0.64,
"grad_norm": 0.8911900520324707,
"learning_rate": 3.073083539996446e-06,
"loss": 0.5845,
"step": 10057
},
{
"epoch": 0.64,
"grad_norm": 0.8566701412200928,
"learning_rate": 3.072136838143225e-06,
"loss": 0.5495,
"step": 10058
},
{
"epoch": 0.64,
"grad_norm": 0.8495940566062927,
"learning_rate": 3.0711902174656126e-06,
"loss": 0.5523,
"step": 10059
},
{
"epoch": 0.64,
"grad_norm": 0.9519007802009583,
"learning_rate": 3.070243678003472e-06,
"loss": 0.6046,
"step": 10060
},
{
"epoch": 0.64,
"grad_norm": 0.8953449726104736,
"learning_rate": 3.069297219796658e-06,
"loss": 0.6238,
"step": 10061
},
{
"epoch": 0.64,
"grad_norm": 0.8640437126159668,
"learning_rate": 3.068350842885022e-06,
"loss": 0.5685,
"step": 10062
},
{
"epoch": 0.64,
"grad_norm": 0.9119696021080017,
"learning_rate": 3.0674045473084103e-06,
"loss": 0.5888,
"step": 10063
},
{
"epoch": 0.64,
"grad_norm": 0.8524396419525146,
"learning_rate": 3.0664583331066695e-06,
"loss": 0.521,
"step": 10064
},
{
"epoch": 0.64,
"grad_norm": 0.896168053150177,
"learning_rate": 3.0655122003196443e-06,
"loss": 0.5385,
"step": 10065
},
{
"epoch": 0.64,
"grad_norm": 0.9223374724388123,
"learning_rate": 3.06456614898717e-06,
"loss": 0.6304,
"step": 10066
},
{
"epoch": 0.64,
"grad_norm": 0.877920389175415,
"learning_rate": 3.0636201791490823e-06,
"loss": 0.6141,
"step": 10067
},
{
"epoch": 0.64,
"grad_norm": 0.8867497444152832,
"learning_rate": 3.062674290845211e-06,
"loss": 0.54,
"step": 10068
},
{
"epoch": 0.64,
"grad_norm": 0.8688225746154785,
"learning_rate": 3.061728484115388e-06,
"loss": 0.5374,
"step": 10069
},
{
"epoch": 0.64,
"grad_norm": 0.8275909423828125,
"learning_rate": 3.0607827589994353e-06,
"loss": 0.5417,
"step": 10070
},
{
"epoch": 0.64,
"grad_norm": 0.8562379479408264,
"learning_rate": 3.0598371155371747e-06,
"loss": 0.5275,
"step": 10071
},
{
"epoch": 0.64,
"grad_norm": 0.8817172646522522,
"learning_rate": 3.058891553768422e-06,
"loss": 0.5717,
"step": 10072
},
{
"epoch": 0.64,
"grad_norm": 0.9288895130157471,
"learning_rate": 3.0579460737329958e-06,
"loss": 0.61,
"step": 10073
},
{
"epoch": 0.64,
"grad_norm": 0.9470510482788086,
"learning_rate": 3.0570006754707044e-06,
"loss": 0.6149,
"step": 10074
},
{
"epoch": 0.64,
"grad_norm": 0.9388991594314575,
"learning_rate": 3.056055359021354e-06,
"loss": 0.5868,
"step": 10075
},
{
"epoch": 0.64,
"grad_norm": 0.8381592035293579,
"learning_rate": 3.0551101244247494e-06,
"loss": 0.5587,
"step": 10076
},
{
"epoch": 0.64,
"grad_norm": 0.8484103679656982,
"learning_rate": 3.0541649717206933e-06,
"loss": 0.544,
"step": 10077
},
{
"epoch": 0.64,
"grad_norm": 0.9049462080001831,
"learning_rate": 3.0532199009489814e-06,
"loss": 0.5781,
"step": 10078
},
{
"epoch": 0.64,
"grad_norm": 0.9403258562088013,
"learning_rate": 3.052274912149406e-06,
"loss": 0.5796,
"step": 10079
},
{
"epoch": 0.64,
"grad_norm": 0.8975145220756531,
"learning_rate": 3.0513300053617595e-06,
"loss": 0.564,
"step": 10080
},
{
"epoch": 0.64,
"grad_norm": 0.9125024676322937,
"learning_rate": 3.0503851806258257e-06,
"loss": 0.5705,
"step": 10081
},
{
"epoch": 0.64,
"grad_norm": 0.8600341081619263,
"learning_rate": 3.0494404379813914e-06,
"loss": 0.6273,
"step": 10082
},
{
"epoch": 0.64,
"grad_norm": 0.8747133016586304,
"learning_rate": 3.048495777468234e-06,
"loss": 0.6381,
"step": 10083
},
{
"epoch": 0.64,
"grad_norm": 0.9442613124847412,
"learning_rate": 3.047551199126131e-06,
"loss": 0.5865,
"step": 10084
},
{
"epoch": 0.64,
"grad_norm": 0.8632836937904358,
"learning_rate": 3.046606702994854e-06,
"loss": 0.6283,
"step": 10085
},
{
"epoch": 0.64,
"grad_norm": 0.8757123351097107,
"learning_rate": 3.0456622891141748e-06,
"loss": 0.5375,
"step": 10086
},
{
"epoch": 0.64,
"grad_norm": 0.8973109126091003,
"learning_rate": 3.0447179575238565e-06,
"loss": 0.6009,
"step": 10087
},
{
"epoch": 0.64,
"grad_norm": 0.9889295101165771,
"learning_rate": 3.0437737082636647e-06,
"loss": 0.6492,
"step": 10088
},
{
"epoch": 0.64,
"grad_norm": 0.8877894282341003,
"learning_rate": 3.0428295413733546e-06,
"loss": 0.4994,
"step": 10089
},
{
"epoch": 0.64,
"grad_norm": 0.9340519309043884,
"learning_rate": 3.0418854568926866e-06,
"loss": 0.5923,
"step": 10090
},
{
"epoch": 0.64,
"grad_norm": 0.8820253610610962,
"learning_rate": 3.0409414548614086e-06,
"loss": 0.5531,
"step": 10091
},
{
"epoch": 0.64,
"grad_norm": 0.8349282145500183,
"learning_rate": 3.039997535319272e-06,
"loss": 0.572,
"step": 10092
},
{
"epoch": 0.64,
"grad_norm": 0.8930419087409973,
"learning_rate": 3.039053698306019e-06,
"loss": 0.5305,
"step": 10093
},
{
"epoch": 0.64,
"grad_norm": 0.8789426684379578,
"learning_rate": 3.0381099438613948e-06,
"loss": 0.5386,
"step": 10094
},
{
"epoch": 0.64,
"grad_norm": 0.8947481513023376,
"learning_rate": 3.037166272025135e-06,
"loss": 0.5672,
"step": 10095
},
{
"epoch": 0.64,
"grad_norm": 0.8884199261665344,
"learning_rate": 3.0362226828369767e-06,
"loss": 0.6096,
"step": 10096
},
{
"epoch": 0.64,
"grad_norm": 0.885449230670929,
"learning_rate": 3.0352791763366484e-06,
"loss": 0.5831,
"step": 10097
},
{
"epoch": 0.64,
"grad_norm": 0.836551308631897,
"learning_rate": 3.0343357525638787e-06,
"loss": 0.5132,
"step": 10098
},
{
"epoch": 0.64,
"grad_norm": 0.9031782150268555,
"learning_rate": 3.0333924115583935e-06,
"loss": 0.5898,
"step": 10099
},
{
"epoch": 0.64,
"grad_norm": 0.9216272234916687,
"learning_rate": 3.032449153359913e-06,
"loss": 0.5271,
"step": 10100
},
{
"epoch": 0.64,
"grad_norm": 0.8929412961006165,
"learning_rate": 3.031505978008153e-06,
"loss": 0.5852,
"step": 10101
},
{
"epoch": 0.64,
"grad_norm": 0.884545087814331,
"learning_rate": 3.030562885542827e-06,
"loss": 0.5595,
"step": 10102
},
{
"epoch": 0.64,
"grad_norm": 0.881131649017334,
"learning_rate": 3.0296198760036493e-06,
"loss": 0.5557,
"step": 10103
},
{
"epoch": 0.64,
"grad_norm": 0.9663098454475403,
"learning_rate": 3.0286769494303237e-06,
"loss": 0.5894,
"step": 10104
},
{
"epoch": 0.64,
"grad_norm": 0.936959445476532,
"learning_rate": 3.0277341058625537e-06,
"loss": 0.5987,
"step": 10105
},
{
"epoch": 0.64,
"grad_norm": 0.8869735598564148,
"learning_rate": 3.026791345340038e-06,
"loss": 0.5943,
"step": 10106
},
{
"epoch": 0.64,
"grad_norm": 0.8177929520606995,
"learning_rate": 3.0258486679024767e-06,
"loss": 0.528,
"step": 10107
},
{
"epoch": 0.64,
"grad_norm": 0.9447188377380371,
"learning_rate": 3.0249060735895603e-06,
"loss": 0.5911,
"step": 10108
},
{
"epoch": 0.64,
"grad_norm": 0.9261248707771301,
"learning_rate": 3.0239635624409767e-06,
"loss": 0.5625,
"step": 10109
},
{
"epoch": 0.64,
"grad_norm": 0.8987361788749695,
"learning_rate": 3.0230211344964154e-06,
"loss": 0.5519,
"step": 10110
},
{
"epoch": 0.64,
"grad_norm": 0.8797249794006348,
"learning_rate": 3.0220787897955544e-06,
"loss": 0.5839,
"step": 10111
},
{
"epoch": 0.64,
"grad_norm": 0.9020108580589294,
"learning_rate": 3.021136528378077e-06,
"loss": 0.5724,
"step": 10112
},
{
"epoch": 0.64,
"grad_norm": 1.0054893493652344,
"learning_rate": 3.020194350283655e-06,
"loss": 0.6108,
"step": 10113
},
{
"epoch": 0.64,
"grad_norm": 0.9611765146255493,
"learning_rate": 3.019252255551963e-06,
"loss": 0.6548,
"step": 10114
},
{
"epoch": 0.64,
"grad_norm": 0.8305823802947998,
"learning_rate": 3.0183102442226653e-06,
"loss": 0.5477,
"step": 10115
},
{
"epoch": 0.64,
"grad_norm": 0.8949651718139648,
"learning_rate": 3.017368316335432e-06,
"loss": 0.576,
"step": 10116
},
{
"epoch": 0.64,
"grad_norm": 0.919265627861023,
"learning_rate": 3.0164264719299207e-06,
"loss": 0.585,
"step": 10117
},
{
"epoch": 0.64,
"grad_norm": 0.8549671173095703,
"learning_rate": 3.0154847110457918e-06,
"loss": 0.5094,
"step": 10118
},
{
"epoch": 0.64,
"grad_norm": 0.9418630003929138,
"learning_rate": 3.0145430337226955e-06,
"loss": 0.5906,
"step": 10119
},
{
"epoch": 0.64,
"grad_norm": 0.9273284673690796,
"learning_rate": 3.013601440000288e-06,
"loss": 0.5946,
"step": 10120
},
{
"epoch": 0.64,
"grad_norm": 0.9151654243469238,
"learning_rate": 3.0126599299182114e-06,
"loss": 0.5878,
"step": 10121
},
{
"epoch": 0.64,
"grad_norm": 0.8435792922973633,
"learning_rate": 3.0117185035161135e-06,
"loss": 0.5951,
"step": 10122
},
{
"epoch": 0.64,
"grad_norm": 0.8737865090370178,
"learning_rate": 3.01077716083363e-06,
"loss": 0.5739,
"step": 10123
},
{
"epoch": 0.64,
"grad_norm": 0.8778201937675476,
"learning_rate": 3.009835901910403e-06,
"loss": 0.5487,
"step": 10124
},
{
"epoch": 0.64,
"grad_norm": 0.862269937992096,
"learning_rate": 3.008894726786062e-06,
"loss": 0.5627,
"step": 10125
},
{
"epoch": 0.64,
"grad_norm": 0.8969505429267883,
"learning_rate": 3.007953635500238e-06,
"loss": 0.5473,
"step": 10126
},
{
"epoch": 0.64,
"grad_norm": 0.9201652407646179,
"learning_rate": 3.0070126280925564e-06,
"loss": 0.6661,
"step": 10127
},
{
"epoch": 0.64,
"grad_norm": 0.882611870765686,
"learning_rate": 3.0060717046026387e-06,
"loss": 0.6069,
"step": 10128
},
{
"epoch": 0.64,
"grad_norm": 0.9005841016769409,
"learning_rate": 3.0051308650701054e-06,
"loss": 0.6415,
"step": 10129
},
{
"epoch": 0.64,
"grad_norm": 0.8846337795257568,
"learning_rate": 3.004190109534573e-06,
"loss": 0.5779,
"step": 10130
},
{
"epoch": 0.64,
"grad_norm": 0.9538823366165161,
"learning_rate": 3.0032494380356523e-06,
"loss": 0.5801,
"step": 10131
},
{
"epoch": 0.64,
"grad_norm": 0.8762175440788269,
"learning_rate": 3.002308850612949e-06,
"loss": 0.5717,
"step": 10132
},
{
"epoch": 0.64,
"grad_norm": 0.8317214846611023,
"learning_rate": 3.001368347306073e-06,
"loss": 0.5577,
"step": 10133
},
{
"epoch": 0.64,
"grad_norm": 0.8527503609657288,
"learning_rate": 3.0004279281546235e-06,
"loss": 0.5818,
"step": 10134
},
{
"epoch": 0.64,
"grad_norm": 0.861371636390686,
"learning_rate": 2.999487593198197e-06,
"loss": 0.5475,
"step": 10135
},
{
"epoch": 0.64,
"grad_norm": 0.8559701442718506,
"learning_rate": 2.9985473424763876e-06,
"loss": 0.5565,
"step": 10136
},
{
"epoch": 0.64,
"grad_norm": 0.9402846693992615,
"learning_rate": 2.9976071760287874e-06,
"loss": 0.6107,
"step": 10137
},
{
"epoch": 0.64,
"grad_norm": 0.8749223351478577,
"learning_rate": 2.9966670938949847e-06,
"loss": 0.5484,
"step": 10138
},
{
"epoch": 0.64,
"grad_norm": 0.8214702606201172,
"learning_rate": 2.995727096114561e-06,
"loss": 0.5719,
"step": 10139
},
{
"epoch": 0.64,
"grad_norm": 0.8834431767463684,
"learning_rate": 2.9947871827270956e-06,
"loss": 0.6228,
"step": 10140
},
{
"epoch": 0.64,
"grad_norm": 0.9178330302238464,
"learning_rate": 2.993847353772168e-06,
"loss": 0.5815,
"step": 10141
},
{
"epoch": 0.64,
"grad_norm": 0.8592966198921204,
"learning_rate": 2.9929076092893496e-06,
"loss": 0.5508,
"step": 10142
},
{
"epoch": 0.64,
"grad_norm": 0.8537743091583252,
"learning_rate": 2.991967949318209e-06,
"loss": 0.6015,
"step": 10143
},
{
"epoch": 0.64,
"grad_norm": 0.8182849884033203,
"learning_rate": 2.9910283738983125e-06,
"loss": 0.5648,
"step": 10144
},
{
"epoch": 0.64,
"grad_norm": 0.9029396772384644,
"learning_rate": 2.9900888830692208e-06,
"loss": 0.6084,
"step": 10145
},
{
"epoch": 0.64,
"grad_norm": 0.8994178175926208,
"learning_rate": 2.9891494768704964e-06,
"loss": 0.6156,
"step": 10146
},
{
"epoch": 0.64,
"grad_norm": 0.8991573452949524,
"learning_rate": 2.9882101553416932e-06,
"loss": 0.5458,
"step": 10147
},
{
"epoch": 0.64,
"grad_norm": 0.89846271276474,
"learning_rate": 2.9872709185223596e-06,
"loss": 0.6052,
"step": 10148
},
{
"epoch": 0.64,
"grad_norm": 0.8613349199295044,
"learning_rate": 2.9863317664520453e-06,
"loss": 0.5582,
"step": 10149
},
{
"epoch": 0.64,
"grad_norm": 0.9185076951980591,
"learning_rate": 2.9853926991702974e-06,
"loss": 0.6023,
"step": 10150
},
{
"epoch": 0.64,
"grad_norm": 0.9413586258888245,
"learning_rate": 2.984453716716655e-06,
"loss": 0.5681,
"step": 10151
},
{
"epoch": 0.64,
"grad_norm": 0.843997061252594,
"learning_rate": 2.9835148191306535e-06,
"loss": 0.5453,
"step": 10152
},
{
"epoch": 0.64,
"grad_norm": 0.8782387375831604,
"learning_rate": 2.9825760064518273e-06,
"loss": 0.5996,
"step": 10153
},
{
"epoch": 0.64,
"grad_norm": 0.7524551749229431,
"learning_rate": 2.981637278719709e-06,
"loss": 0.5087,
"step": 10154
},
{
"epoch": 0.64,
"grad_norm": 0.8854588270187378,
"learning_rate": 2.9806986359738244e-06,
"loss": 0.5919,
"step": 10155
},
{
"epoch": 0.64,
"grad_norm": 0.8053493499755859,
"learning_rate": 2.979760078253694e-06,
"loss": 0.5397,
"step": 10156
},
{
"epoch": 0.64,
"grad_norm": 0.9677163362503052,
"learning_rate": 2.9788216055988397e-06,
"loss": 0.6133,
"step": 10157
},
{
"epoch": 0.64,
"grad_norm": 0.8523488640785217,
"learning_rate": 2.977883218048775e-06,
"loss": 0.5826,
"step": 10158
},
{
"epoch": 0.64,
"grad_norm": 0.9211286902427673,
"learning_rate": 2.9769449156430147e-06,
"loss": 0.6244,
"step": 10159
},
{
"epoch": 0.64,
"grad_norm": 0.9199965596199036,
"learning_rate": 2.9760066984210655e-06,
"loss": 0.6112,
"step": 10160
},
{
"epoch": 0.64,
"grad_norm": 0.8444145321846008,
"learning_rate": 2.975068566422434e-06,
"loss": 0.5704,
"step": 10161
},
{
"epoch": 0.64,
"grad_norm": 0.907306969165802,
"learning_rate": 2.97413051968662e-06,
"loss": 0.6156,
"step": 10162
},
{
"epoch": 0.64,
"grad_norm": 0.8991623520851135,
"learning_rate": 2.9731925582531227e-06,
"loss": 0.5905,
"step": 10163
},
{
"epoch": 0.64,
"grad_norm": 0.8663104772567749,
"learning_rate": 2.9722546821614373e-06,
"loss": 0.5704,
"step": 10164
},
{
"epoch": 0.64,
"grad_norm": 0.8777760863304138,
"learning_rate": 2.9713168914510533e-06,
"loss": 0.5416,
"step": 10165
},
{
"epoch": 0.64,
"grad_norm": 0.8857688307762146,
"learning_rate": 2.970379186161455e-06,
"loss": 0.5669,
"step": 10166
},
{
"epoch": 0.64,
"grad_norm": 0.8899209499359131,
"learning_rate": 2.96944156633213e-06,
"loss": 0.6229,
"step": 10167
},
{
"epoch": 0.64,
"grad_norm": 0.8441648483276367,
"learning_rate": 2.9685040320025583e-06,
"loss": 0.5529,
"step": 10168
},
{
"epoch": 0.64,
"grad_norm": 0.8758301138877869,
"learning_rate": 2.9675665832122146e-06,
"loss": 0.6021,
"step": 10169
},
{
"epoch": 0.64,
"grad_norm": 0.8592897057533264,
"learning_rate": 2.966629220000569e-06,
"loss": 0.5656,
"step": 10170
},
{
"epoch": 0.64,
"grad_norm": 0.8968542814254761,
"learning_rate": 2.965691942407095e-06,
"loss": 0.5639,
"step": 10171
},
{
"epoch": 0.64,
"grad_norm": 0.8699895143508911,
"learning_rate": 2.9647547504712577e-06,
"loss": 0.6159,
"step": 10172
},
{
"epoch": 0.64,
"grad_norm": 0.8662521243095398,
"learning_rate": 2.9638176442325173e-06,
"loss": 0.5801,
"step": 10173
},
{
"epoch": 0.64,
"grad_norm": 0.8635749220848083,
"learning_rate": 2.962880623730332e-06,
"loss": 0.5819,
"step": 10174
},
{
"epoch": 0.64,
"grad_norm": 0.9355505108833313,
"learning_rate": 2.9619436890041555e-06,
"loss": 0.5975,
"step": 10175
},
{
"epoch": 0.64,
"grad_norm": 0.9172835350036621,
"learning_rate": 2.961006840093442e-06,
"loss": 0.5504,
"step": 10176
},
{
"epoch": 0.64,
"grad_norm": 0.814353346824646,
"learning_rate": 2.9600700770376384e-06,
"loss": 0.551,
"step": 10177
},
{
"epoch": 0.64,
"grad_norm": 0.8739163875579834,
"learning_rate": 2.959133399876186e-06,
"loss": 0.5662,
"step": 10178
},
{
"epoch": 0.64,
"grad_norm": 0.8901175856590271,
"learning_rate": 2.958196808648525e-06,
"loss": 0.6053,
"step": 10179
},
{
"epoch": 0.64,
"grad_norm": 0.9073649644851685,
"learning_rate": 2.957260303394096e-06,
"loss": 0.5715,
"step": 10180
},
{
"epoch": 0.65,
"grad_norm": 0.8607237935066223,
"learning_rate": 2.9563238841523293e-06,
"loss": 0.5847,
"step": 10181
},
{
"epoch": 0.65,
"grad_norm": 0.8739342093467712,
"learning_rate": 2.955387550962654e-06,
"loss": 0.5199,
"step": 10182
},
{
"epoch": 0.65,
"grad_norm": 0.8706129193305969,
"learning_rate": 2.954451303864494e-06,
"loss": 0.61,
"step": 10183
},
{
"epoch": 0.65,
"grad_norm": 0.9067039489746094,
"learning_rate": 2.9535151428972762e-06,
"loss": 0.5869,
"step": 10184
},
{
"epoch": 0.65,
"grad_norm": 0.856227695941925,
"learning_rate": 2.9525790681004172e-06,
"loss": 0.5495,
"step": 10185
},
{
"epoch": 0.65,
"grad_norm": 0.8174280524253845,
"learning_rate": 2.9516430795133294e-06,
"loss": 0.5439,
"step": 10186
},
{
"epoch": 0.65,
"grad_norm": 0.9031091332435608,
"learning_rate": 2.950707177175427e-06,
"loss": 0.5668,
"step": 10187
},
{
"epoch": 0.65,
"grad_norm": 0.8566731810569763,
"learning_rate": 2.9497713611261146e-06,
"loss": 0.5512,
"step": 10188
},
{
"epoch": 0.65,
"grad_norm": 0.9484649300575256,
"learning_rate": 2.9488356314047994e-06,
"loss": 0.6265,
"step": 10189
},
{
"epoch": 0.65,
"grad_norm": 0.8249022364616394,
"learning_rate": 2.94789998805088e-06,
"loss": 0.604,
"step": 10190
},
{
"epoch": 0.65,
"grad_norm": 0.880988597869873,
"learning_rate": 2.9469644311037545e-06,
"loss": 0.6227,
"step": 10191
},
{
"epoch": 0.65,
"grad_norm": 0.9004330635070801,
"learning_rate": 2.946028960602812e-06,
"loss": 0.58,
"step": 10192
},
{
"epoch": 0.65,
"grad_norm": 0.9128255844116211,
"learning_rate": 2.9450935765874474e-06,
"loss": 0.5868,
"step": 10193
},
{
"epoch": 0.65,
"grad_norm": 0.9160966873168945,
"learning_rate": 2.9441582790970425e-06,
"loss": 0.6127,
"step": 10194
},
{
"epoch": 0.65,
"grad_norm": 0.9519477486610413,
"learning_rate": 2.9432230681709815e-06,
"loss": 0.6271,
"step": 10195
},
{
"epoch": 0.65,
"grad_norm": 0.8393691778182983,
"learning_rate": 2.942287943848641e-06,
"loss": 0.552,
"step": 10196
},
{
"epoch": 0.65,
"grad_norm": 0.9617919921875,
"learning_rate": 2.941352906169398e-06,
"loss": 0.6222,
"step": 10197
},
{
"epoch": 0.65,
"grad_norm": 0.8588807582855225,
"learning_rate": 2.9404179551726214e-06,
"loss": 0.5407,
"step": 10198
},
{
"epoch": 0.65,
"grad_norm": 0.9286124110221863,
"learning_rate": 2.939483090897681e-06,
"loss": 0.6136,
"step": 10199
},
{
"epoch": 0.65,
"grad_norm": 0.9325358867645264,
"learning_rate": 2.9385483133839386e-06,
"loss": 0.5356,
"step": 10200
},
{
"epoch": 0.65,
"grad_norm": 0.9960110187530518,
"learning_rate": 2.937613622670756e-06,
"loss": 0.6458,
"step": 10201
},
{
"epoch": 0.65,
"grad_norm": 0.8421880006790161,
"learning_rate": 2.9366790187974897e-06,
"loss": 0.5576,
"step": 10202
},
{
"epoch": 0.65,
"grad_norm": 0.8421469330787659,
"learning_rate": 2.9357445018034926e-06,
"loss": 0.5855,
"step": 10203
},
{
"epoch": 0.65,
"grad_norm": 0.8795361518859863,
"learning_rate": 2.934810071728114e-06,
"loss": 0.5877,
"step": 10204
},
{
"epoch": 0.65,
"grad_norm": 0.9030759930610657,
"learning_rate": 2.9338757286106955e-06,
"loss": 0.5878,
"step": 10205
},
{
"epoch": 0.65,
"grad_norm": 0.8403552770614624,
"learning_rate": 2.9329414724905845e-06,
"loss": 0.5818,
"step": 10206
},
{
"epoch": 0.65,
"grad_norm": 0.8796659708023071,
"learning_rate": 2.9320073034071187e-06,
"loss": 0.5653,
"step": 10207
},
{
"epoch": 0.65,
"grad_norm": 0.8549631237983704,
"learning_rate": 2.9310732213996305e-06,
"loss": 0.5763,
"step": 10208
},
{
"epoch": 0.65,
"grad_norm": 0.8520306944847107,
"learning_rate": 2.9301392265074506e-06,
"loss": 0.5754,
"step": 10209
},
{
"epoch": 0.65,
"grad_norm": 0.8692139387130737,
"learning_rate": 2.9292053187699075e-06,
"loss": 0.5272,
"step": 10210
},
{
"epoch": 0.65,
"grad_norm": 0.8986145257949829,
"learning_rate": 2.9282714982263265e-06,
"loss": 0.5251,
"step": 10211
},
{
"epoch": 0.65,
"grad_norm": 0.9022727012634277,
"learning_rate": 2.927337764916025e-06,
"loss": 0.5878,
"step": 10212
},
{
"epoch": 0.65,
"grad_norm": 0.8935984969139099,
"learning_rate": 2.926404118878319e-06,
"loss": 0.6037,
"step": 10213
},
{
"epoch": 0.65,
"grad_norm": 0.8796955347061157,
"learning_rate": 2.925470560152522e-06,
"loss": 0.5815,
"step": 10214
},
{
"epoch": 0.65,
"grad_norm": 0.8789433240890503,
"learning_rate": 2.924537088777944e-06,
"loss": 0.6031,
"step": 10215
},
{
"epoch": 0.65,
"grad_norm": 0.8689199686050415,
"learning_rate": 2.9236037047938894e-06,
"loss": 0.5566,
"step": 10216
},
{
"epoch": 0.65,
"grad_norm": 0.850175678730011,
"learning_rate": 2.922670408239657e-06,
"loss": 0.5817,
"step": 10217
},
{
"epoch": 0.65,
"grad_norm": 0.8795483112335205,
"learning_rate": 2.921737199154549e-06,
"loss": 0.6259,
"step": 10218
},
{
"epoch": 0.65,
"grad_norm": 0.8465956449508667,
"learning_rate": 2.920804077577859e-06,
"loss": 0.5467,
"step": 10219
},
{
"epoch": 0.65,
"grad_norm": 0.8541370630264282,
"learning_rate": 2.919871043548875e-06,
"loss": 0.5509,
"step": 10220
},
{
"epoch": 0.65,
"grad_norm": 0.8528336882591248,
"learning_rate": 2.9189380971068864e-06,
"loss": 0.5237,
"step": 10221
},
{
"epoch": 0.65,
"grad_norm": 1.0272489786148071,
"learning_rate": 2.918005238291172e-06,
"loss": 0.5948,
"step": 10222
},
{
"epoch": 0.65,
"grad_norm": 0.8642032742500305,
"learning_rate": 2.9170724671410155e-06,
"loss": 0.605,
"step": 10223
},
{
"epoch": 0.65,
"grad_norm": 0.8577390313148499,
"learning_rate": 2.916139783695694e-06,
"loss": 0.5634,
"step": 10224
},
{
"epoch": 0.65,
"grad_norm": 0.935626208782196,
"learning_rate": 2.9152071879944743e-06,
"loss": 0.5815,
"step": 10225
},
{
"epoch": 0.65,
"grad_norm": 0.9437475800514221,
"learning_rate": 2.914274680076628e-06,
"loss": 0.6359,
"step": 10226
},
{
"epoch": 0.65,
"grad_norm": 0.8789603114128113,
"learning_rate": 2.913342259981419e-06,
"loss": 0.6031,
"step": 10227
},
{
"epoch": 0.65,
"grad_norm": 0.9114549160003662,
"learning_rate": 2.9124099277481088e-06,
"loss": 0.544,
"step": 10228
},
{
"epoch": 0.65,
"grad_norm": 0.8359835743904114,
"learning_rate": 2.9114776834159563e-06,
"loss": 0.5686,
"step": 10229
},
{
"epoch": 0.65,
"grad_norm": 0.9180512428283691,
"learning_rate": 2.910545527024209e-06,
"loss": 0.58,
"step": 10230
},
{
"epoch": 0.65,
"grad_norm": 0.9041998386383057,
"learning_rate": 2.9096134586121227e-06,
"loss": 0.6303,
"step": 10231
},
{
"epoch": 0.65,
"grad_norm": 0.8931963443756104,
"learning_rate": 2.908681478218944e-06,
"loss": 0.5667,
"step": 10232
},
{
"epoch": 0.65,
"grad_norm": 0.8502830266952515,
"learning_rate": 2.907749585883911e-06,
"loss": 0.5689,
"step": 10233
},
{
"epoch": 0.65,
"grad_norm": 0.8675402998924255,
"learning_rate": 2.906817781646264e-06,
"loss": 0.5585,
"step": 10234
},
{
"epoch": 0.65,
"grad_norm": 0.9389364719390869,
"learning_rate": 2.905886065545239e-06,
"loss": 0.6007,
"step": 10235
},
{
"epoch": 0.65,
"grad_norm": 0.8219680786132812,
"learning_rate": 2.9049544376200674e-06,
"loss": 0.5835,
"step": 10236
},
{
"epoch": 0.65,
"grad_norm": 0.9516189098358154,
"learning_rate": 2.9040228979099777e-06,
"loss": 0.5689,
"step": 10237
},
{
"epoch": 0.65,
"grad_norm": 0.8465138077735901,
"learning_rate": 2.9030914464541904e-06,
"loss": 0.5335,
"step": 10238
},
{
"epoch": 0.65,
"grad_norm": 0.8579193949699402,
"learning_rate": 2.902160083291926e-06,
"loss": 0.5572,
"step": 10239
},
{
"epoch": 0.65,
"grad_norm": 0.9451611042022705,
"learning_rate": 2.9012288084624065e-06,
"loss": 0.5445,
"step": 10240
},
{
"epoch": 0.65,
"grad_norm": 0.8656702637672424,
"learning_rate": 2.9002976220048383e-06,
"loss": 0.5438,
"step": 10241
},
{
"epoch": 0.65,
"grad_norm": 0.8685592412948608,
"learning_rate": 2.899366523958434e-06,
"loss": 0.6194,
"step": 10242
},
{
"epoch": 0.65,
"grad_norm": 0.8590168356895447,
"learning_rate": 2.898435514362397e-06,
"loss": 0.5964,
"step": 10243
},
{
"epoch": 0.65,
"grad_norm": 0.8453319668769836,
"learning_rate": 2.89750459325593e-06,
"loss": 0.5445,
"step": 10244
},
{
"epoch": 0.65,
"grad_norm": 0.8947049379348755,
"learning_rate": 2.896573760678232e-06,
"loss": 0.6004,
"step": 10245
},
{
"epoch": 0.65,
"grad_norm": 0.8810886144638062,
"learning_rate": 2.8956430166684945e-06,
"loss": 0.5597,
"step": 10246
},
{
"epoch": 0.65,
"grad_norm": 0.9045408964157104,
"learning_rate": 2.8947123612659068e-06,
"loss": 0.5975,
"step": 10247
},
{
"epoch": 0.65,
"grad_norm": 0.907370388507843,
"learning_rate": 2.8937817945096614e-06,
"loss": 0.546,
"step": 10248
},
{
"epoch": 0.65,
"grad_norm": 0.929260790348053,
"learning_rate": 2.8928513164389353e-06,
"loss": 0.6313,
"step": 10249
},
{
"epoch": 0.65,
"grad_norm": 0.8894972205162048,
"learning_rate": 2.8919209270929106e-06,
"loss": 0.6308,
"step": 10250
},
{
"epoch": 0.65,
"grad_norm": 0.8753820061683655,
"learning_rate": 2.8909906265107647e-06,
"loss": 0.5576,
"step": 10251
},
{
"epoch": 0.65,
"grad_norm": 0.9265826940536499,
"learning_rate": 2.890060414731662e-06,
"loss": 0.5626,
"step": 10252
},
{
"epoch": 0.65,
"grad_norm": 0.9352290034294128,
"learning_rate": 2.8891302917947794e-06,
"loss": 0.628,
"step": 10253
},
{
"epoch": 0.65,
"grad_norm": 0.9359737038612366,
"learning_rate": 2.8882002577392752e-06,
"loss": 0.6278,
"step": 10254
},
{
"epoch": 0.65,
"grad_norm": 0.9087960124015808,
"learning_rate": 2.8872703126043116e-06,
"loss": 0.6675,
"step": 10255
},
{
"epoch": 0.65,
"grad_norm": 0.9556131958961487,
"learning_rate": 2.8863404564290455e-06,
"loss": 0.5625,
"step": 10256
},
{
"epoch": 0.65,
"grad_norm": 0.8998469710350037,
"learning_rate": 2.88541068925263e-06,
"loss": 0.6218,
"step": 10257
},
{
"epoch": 0.65,
"grad_norm": 0.8599625825881958,
"learning_rate": 2.8844810111142143e-06,
"loss": 0.5521,
"step": 10258
},
{
"epoch": 0.65,
"grad_norm": 0.8799909353256226,
"learning_rate": 2.883551422052946e-06,
"loss": 0.5713,
"step": 10259
},
{
"epoch": 0.65,
"grad_norm": 0.864239513874054,
"learning_rate": 2.8826219221079597e-06,
"loss": 0.6036,
"step": 10260
},
{
"epoch": 0.65,
"grad_norm": 0.8341729044914246,
"learning_rate": 2.8816925113184034e-06,
"loss": 0.5587,
"step": 10261
},
{
"epoch": 0.65,
"grad_norm": 0.8841572403907776,
"learning_rate": 2.8807631897234045e-06,
"loss": 0.6003,
"step": 10262
},
{
"epoch": 0.65,
"grad_norm": 0.9406521320343018,
"learning_rate": 2.8798339573620953e-06,
"loss": 0.6259,
"step": 10263
},
{
"epoch": 0.65,
"grad_norm": 0.8605220913887024,
"learning_rate": 2.8789048142736026e-06,
"loss": 0.5397,
"step": 10264
},
{
"epoch": 0.65,
"grad_norm": 0.9191677570343018,
"learning_rate": 2.8779757604970495e-06,
"loss": 0.5754,
"step": 10265
},
{
"epoch": 0.65,
"grad_norm": 0.8478958010673523,
"learning_rate": 2.877046796071554e-06,
"loss": 0.5911,
"step": 10266
},
{
"epoch": 0.65,
"grad_norm": 0.909317135810852,
"learning_rate": 2.8761179210362365e-06,
"loss": 0.5999,
"step": 10267
},
{
"epoch": 0.65,
"grad_norm": 0.9130200743675232,
"learning_rate": 2.8751891354302018e-06,
"loss": 0.6098,
"step": 10268
},
{
"epoch": 0.65,
"grad_norm": 0.8185581564903259,
"learning_rate": 2.8742604392925587e-06,
"loss": 0.5674,
"step": 10269
},
{
"epoch": 0.65,
"grad_norm": 0.8762167692184448,
"learning_rate": 2.8733318326624182e-06,
"loss": 0.5917,
"step": 10270
},
{
"epoch": 0.65,
"grad_norm": 0.852927029132843,
"learning_rate": 2.8724033155788743e-06,
"loss": 0.587,
"step": 10271
},
{
"epoch": 0.65,
"grad_norm": 0.8949410915374756,
"learning_rate": 2.871474888081025e-06,
"loss": 0.6095,
"step": 10272
},
{
"epoch": 0.65,
"grad_norm": 0.8751702904701233,
"learning_rate": 2.870546550207964e-06,
"loss": 0.5567,
"step": 10273
},
{
"epoch": 0.65,
"grad_norm": 0.9688418507575989,
"learning_rate": 2.8696183019987796e-06,
"loss": 0.559,
"step": 10274
},
{
"epoch": 0.65,
"grad_norm": 0.9164302945137024,
"learning_rate": 2.868690143492559e-06,
"loss": 0.6014,
"step": 10275
},
{
"epoch": 0.65,
"grad_norm": 0.9164918065071106,
"learning_rate": 2.8677620747283807e-06,
"loss": 0.5787,
"step": 10276
},
{
"epoch": 0.65,
"grad_norm": 0.8945170044898987,
"learning_rate": 2.8668340957453224e-06,
"loss": 0.5649,
"step": 10277
},
{
"epoch": 0.65,
"grad_norm": 0.8914811015129089,
"learning_rate": 2.865906206582463e-06,
"loss": 0.5866,
"step": 10278
},
{
"epoch": 0.65,
"grad_norm": 0.8111115097999573,
"learning_rate": 2.8649784072788668e-06,
"loss": 0.534,
"step": 10279
},
{
"epoch": 0.65,
"grad_norm": 0.9475454092025757,
"learning_rate": 2.8640506978736027e-06,
"loss": 0.6491,
"step": 10280
},
{
"epoch": 0.65,
"grad_norm": 0.9642074704170227,
"learning_rate": 2.8631230784057362e-06,
"loss": 0.6173,
"step": 10281
},
{
"epoch": 0.65,
"grad_norm": 0.9231216907501221,
"learning_rate": 2.862195548914318e-06,
"loss": 0.6038,
"step": 10282
},
{
"epoch": 0.65,
"grad_norm": 0.9643025994300842,
"learning_rate": 2.8612681094384135e-06,
"loss": 0.5809,
"step": 10283
},
{
"epoch": 0.65,
"grad_norm": 0.8661615252494812,
"learning_rate": 2.8603407600170664e-06,
"loss": 0.5797,
"step": 10284
},
{
"epoch": 0.65,
"grad_norm": 0.8539398908615112,
"learning_rate": 2.8594135006893264e-06,
"loss": 0.595,
"step": 10285
},
{
"epoch": 0.65,
"grad_norm": 0.8886363506317139,
"learning_rate": 2.858486331494238e-06,
"loss": 0.5977,
"step": 10286
},
{
"epoch": 0.65,
"grad_norm": 0.8894230127334595,
"learning_rate": 2.8575592524708397e-06,
"loss": 0.5999,
"step": 10287
},
{
"epoch": 0.65,
"grad_norm": 0.8313820362091064,
"learning_rate": 2.856632263658169e-06,
"loss": 0.5703,
"step": 10288
},
{
"epoch": 0.65,
"grad_norm": 0.8702353239059448,
"learning_rate": 2.855705365095258e-06,
"loss": 0.6152,
"step": 10289
},
{
"epoch": 0.65,
"grad_norm": 0.8346042037010193,
"learning_rate": 2.854778556821132e-06,
"loss": 0.5277,
"step": 10290
},
{
"epoch": 0.65,
"grad_norm": 0.9115665555000305,
"learning_rate": 2.8538518388748214e-06,
"loss": 0.595,
"step": 10291
},
{
"epoch": 0.65,
"grad_norm": 0.9286834001541138,
"learning_rate": 2.8529252112953434e-06,
"loss": 0.6031,
"step": 10292
},
{
"epoch": 0.65,
"grad_norm": 0.9434182047843933,
"learning_rate": 2.8519986741217144e-06,
"loss": 0.5983,
"step": 10293
},
{
"epoch": 0.65,
"grad_norm": 0.8886797428131104,
"learning_rate": 2.8510722273929486e-06,
"loss": 0.595,
"step": 10294
},
{
"epoch": 0.65,
"grad_norm": 0.868736207485199,
"learning_rate": 2.8501458711480564e-06,
"loss": 0.5769,
"step": 10295
},
{
"epoch": 0.65,
"grad_norm": 0.8849626183509827,
"learning_rate": 2.8492196054260424e-06,
"loss": 0.6066,
"step": 10296
},
{
"epoch": 0.65,
"grad_norm": 0.860435426235199,
"learning_rate": 2.848293430265911e-06,
"loss": 0.5701,
"step": 10297
},
{
"epoch": 0.65,
"grad_norm": 0.9047563672065735,
"learning_rate": 2.8473673457066564e-06,
"loss": 0.5482,
"step": 10298
},
{
"epoch": 0.65,
"grad_norm": 0.8450853824615479,
"learning_rate": 2.8464413517872737e-06,
"loss": 0.5659,
"step": 10299
},
{
"epoch": 0.65,
"grad_norm": 0.8788303732872009,
"learning_rate": 2.845515448546754e-06,
"loss": 0.5781,
"step": 10300
},
{
"epoch": 0.65,
"grad_norm": 0.8010481595993042,
"learning_rate": 2.8445896360240845e-06,
"loss": 0.5364,
"step": 10301
},
{
"epoch": 0.65,
"grad_norm": 0.9223700761795044,
"learning_rate": 2.843663914258249e-06,
"loss": 0.5826,
"step": 10302
},
{
"epoch": 0.65,
"grad_norm": 0.8434270024299622,
"learning_rate": 2.8427382832882207e-06,
"loss": 0.5676,
"step": 10303
},
{
"epoch": 0.65,
"grad_norm": 0.9163960218429565,
"learning_rate": 2.8418127431529807e-06,
"loss": 0.5913,
"step": 10304
},
{
"epoch": 0.65,
"grad_norm": 0.8485933542251587,
"learning_rate": 2.8408872938915e-06,
"loss": 0.5494,
"step": 10305
},
{
"epoch": 0.65,
"grad_norm": 0.9408286213874817,
"learning_rate": 2.8399619355427427e-06,
"loss": 0.6158,
"step": 10306
},
{
"epoch": 0.65,
"grad_norm": 0.8759029507637024,
"learning_rate": 2.839036668145674e-06,
"loss": 0.6119,
"step": 10307
},
{
"epoch": 0.65,
"grad_norm": 0.8358346819877625,
"learning_rate": 2.8381114917392538e-06,
"loss": 0.5738,
"step": 10308
},
{
"epoch": 0.65,
"grad_norm": 0.8680429458618164,
"learning_rate": 2.8371864063624375e-06,
"loss": 0.5452,
"step": 10309
},
{
"epoch": 0.65,
"grad_norm": 0.9013274312019348,
"learning_rate": 2.836261412054181e-06,
"loss": 0.5305,
"step": 10310
},
{
"epoch": 0.65,
"grad_norm": 0.8434852361679077,
"learning_rate": 2.8353365088534247e-06,
"loss": 0.6082,
"step": 10311
},
{
"epoch": 0.65,
"grad_norm": 0.8728095889091492,
"learning_rate": 2.8344116967991197e-06,
"loss": 0.549,
"step": 10312
},
{
"epoch": 0.65,
"grad_norm": 0.8872493505477905,
"learning_rate": 2.8334869759302064e-06,
"loss": 0.5777,
"step": 10313
},
{
"epoch": 0.65,
"grad_norm": 0.8925797343254089,
"learning_rate": 2.8325623462856176e-06,
"loss": 0.5751,
"step": 10314
},
{
"epoch": 0.65,
"grad_norm": 0.903728187084198,
"learning_rate": 2.8316378079042887e-06,
"loss": 0.6265,
"step": 10315
},
{
"epoch": 0.65,
"grad_norm": 0.8824670910835266,
"learning_rate": 2.8307133608251486e-06,
"loss": 0.5769,
"step": 10316
},
{
"epoch": 0.65,
"grad_norm": 0.8991369605064392,
"learning_rate": 2.8297890050871222e-06,
"loss": 0.5767,
"step": 10317
},
{
"epoch": 0.65,
"grad_norm": 0.8974249362945557,
"learning_rate": 2.8288647407291337e-06,
"loss": 0.6057,
"step": 10318
},
{
"epoch": 0.65,
"grad_norm": 0.859311580657959,
"learning_rate": 2.827940567790096e-06,
"loss": 0.5939,
"step": 10319
},
{
"epoch": 0.65,
"grad_norm": 0.9485636949539185,
"learning_rate": 2.8270164863089227e-06,
"loss": 0.6018,
"step": 10320
},
{
"epoch": 0.65,
"grad_norm": 0.8993692398071289,
"learning_rate": 2.82609249632453e-06,
"loss": 0.5957,
"step": 10321
},
{
"epoch": 0.65,
"grad_norm": 0.9110742807388306,
"learning_rate": 2.825168597875818e-06,
"loss": 0.5788,
"step": 10322
},
{
"epoch": 0.65,
"grad_norm": 0.9139736890792847,
"learning_rate": 2.82424479100169e-06,
"loss": 0.5432,
"step": 10323
},
{
"epoch": 0.65,
"grad_norm": 0.909750759601593,
"learning_rate": 2.8233210757410454e-06,
"loss": 0.6235,
"step": 10324
},
{
"epoch": 0.65,
"grad_norm": 0.8736597299575806,
"learning_rate": 2.8223974521327787e-06,
"loss": 0.5876,
"step": 10325
},
{
"epoch": 0.65,
"grad_norm": 0.886572003364563,
"learning_rate": 2.8214739202157794e-06,
"loss": 0.581,
"step": 10326
},
{
"epoch": 0.65,
"grad_norm": 0.8689284920692444,
"learning_rate": 2.820550480028937e-06,
"loss": 0.5974,
"step": 10327
},
{
"epoch": 0.65,
"grad_norm": 0.9559029936790466,
"learning_rate": 2.81962713161113e-06,
"loss": 0.6199,
"step": 10328
},
{
"epoch": 0.65,
"grad_norm": 0.854682445526123,
"learning_rate": 2.8187038750012396e-06,
"loss": 0.5861,
"step": 10329
},
{
"epoch": 0.65,
"grad_norm": 0.8388245105743408,
"learning_rate": 2.8177807102381404e-06,
"loss": 0.5608,
"step": 10330
},
{
"epoch": 0.65,
"grad_norm": 0.8935778737068176,
"learning_rate": 2.816857637360705e-06,
"loss": 0.5666,
"step": 10331
},
{
"epoch": 0.65,
"grad_norm": 0.876492440700531,
"learning_rate": 2.8159346564078006e-06,
"loss": 0.5852,
"step": 10332
},
{
"epoch": 0.65,
"grad_norm": 0.9023503661155701,
"learning_rate": 2.815011767418287e-06,
"loss": 0.6174,
"step": 10333
},
{
"epoch": 0.65,
"grad_norm": 0.9186480045318604,
"learning_rate": 2.8140889704310287e-06,
"loss": 0.5975,
"step": 10334
},
{
"epoch": 0.65,
"grad_norm": 0.8938761949539185,
"learning_rate": 2.8131662654848814e-06,
"loss": 0.5741,
"step": 10335
},
{
"epoch": 0.65,
"grad_norm": 0.8748285174369812,
"learning_rate": 2.8122436526186935e-06,
"loss": 0.6341,
"step": 10336
},
{
"epoch": 0.65,
"grad_norm": 0.8347454071044922,
"learning_rate": 2.8113211318713146e-06,
"loss": 0.6091,
"step": 10337
},
{
"epoch": 0.65,
"grad_norm": 0.8568246364593506,
"learning_rate": 2.810398703281589e-06,
"loss": 0.5535,
"step": 10338
},
{
"epoch": 0.66,
"grad_norm": 0.9079662561416626,
"learning_rate": 2.8094763668883567e-06,
"loss": 0.5603,
"step": 10339
},
{
"epoch": 0.66,
"grad_norm": 0.9075840711593628,
"learning_rate": 2.808554122730457e-06,
"loss": 0.5642,
"step": 10340
},
{
"epoch": 0.66,
"grad_norm": 0.8704594373703003,
"learning_rate": 2.8076319708467146e-06,
"loss": 0.5842,
"step": 10341
},
{
"epoch": 0.66,
"grad_norm": 0.9282211661338806,
"learning_rate": 2.8067099112759665e-06,
"loss": 0.5972,
"step": 10342
},
{
"epoch": 0.66,
"grad_norm": 0.8854076266288757,
"learning_rate": 2.8057879440570356e-06,
"loss": 0.5837,
"step": 10343
},
{
"epoch": 0.66,
"grad_norm": 0.8140289783477783,
"learning_rate": 2.804866069228739e-06,
"loss": 0.5405,
"step": 10344
},
{
"epoch": 0.66,
"grad_norm": 0.9335722923278809,
"learning_rate": 2.803944286829896e-06,
"loss": 0.5488,
"step": 10345
},
{
"epoch": 0.66,
"grad_norm": 0.8789125084877014,
"learning_rate": 2.8030225968993198e-06,
"loss": 0.5977,
"step": 10346
},
{
"epoch": 0.66,
"grad_norm": 0.9545979499816895,
"learning_rate": 2.802100999475819e-06,
"loss": 0.5622,
"step": 10347
},
{
"epoch": 0.66,
"grad_norm": 0.9649593830108643,
"learning_rate": 2.8011794945982013e-06,
"loss": 0.6229,
"step": 10348
},
{
"epoch": 0.66,
"grad_norm": 0.8558527827262878,
"learning_rate": 2.8002580823052638e-06,
"loss": 0.5659,
"step": 10349
},
{
"epoch": 0.66,
"grad_norm": 0.9221006631851196,
"learning_rate": 2.7993367626358047e-06,
"loss": 0.5421,
"step": 10350
},
{
"epoch": 0.66,
"grad_norm": 0.8340117335319519,
"learning_rate": 2.7984155356286224e-06,
"loss": 0.5119,
"step": 10351
},
{
"epoch": 0.66,
"grad_norm": 0.8941150903701782,
"learning_rate": 2.7974944013225013e-06,
"loss": 0.5676,
"step": 10352
},
{
"epoch": 0.66,
"grad_norm": 0.857522189617157,
"learning_rate": 2.796573359756229e-06,
"loss": 0.5856,
"step": 10353
},
{
"epoch": 0.66,
"grad_norm": 0.986824631690979,
"learning_rate": 2.7956524109685874e-06,
"loss": 0.6455,
"step": 10354
},
{
"epoch": 0.66,
"grad_norm": 0.8224316239356995,
"learning_rate": 2.7947315549983545e-06,
"loss": 0.584,
"step": 10355
},
{
"epoch": 0.66,
"grad_norm": 0.8816094994544983,
"learning_rate": 2.793810791884306e-06,
"loss": 0.5838,
"step": 10356
},
{
"epoch": 0.66,
"grad_norm": 0.8999599814414978,
"learning_rate": 2.792890121665208e-06,
"loss": 0.5797,
"step": 10357
},
{
"epoch": 0.66,
"grad_norm": 0.9199798703193665,
"learning_rate": 2.791969544379828e-06,
"loss": 0.606,
"step": 10358
},
{
"epoch": 0.66,
"grad_norm": 0.8767827153205872,
"learning_rate": 2.7910490600669327e-06,
"loss": 0.5771,
"step": 10359
},
{
"epoch": 0.66,
"grad_norm": 0.8857783675193787,
"learning_rate": 2.790128668765275e-06,
"loss": 0.6235,
"step": 10360
},
{
"epoch": 0.66,
"grad_norm": 0.873058021068573,
"learning_rate": 2.789208370513612e-06,
"loss": 0.5137,
"step": 10361
},
{
"epoch": 0.66,
"grad_norm": 0.9512156248092651,
"learning_rate": 2.7882881653506947e-06,
"loss": 0.6124,
"step": 10362
},
{
"epoch": 0.66,
"grad_norm": 0.8597283959388733,
"learning_rate": 2.787368053315266e-06,
"loss": 0.5774,
"step": 10363
},
{
"epoch": 0.66,
"grad_norm": 0.9025830626487732,
"learning_rate": 2.7864480344460743e-06,
"loss": 0.5343,
"step": 10364
},
{
"epoch": 0.66,
"grad_norm": 0.9169187545776367,
"learning_rate": 2.7855281087818543e-06,
"loss": 0.5986,
"step": 10365
},
{
"epoch": 0.66,
"grad_norm": 0.8351139426231384,
"learning_rate": 2.7846082763613412e-06,
"loss": 0.5107,
"step": 10366
},
{
"epoch": 0.66,
"grad_norm": 0.9500547647476196,
"learning_rate": 2.783688537223268e-06,
"loss": 0.6406,
"step": 10367
},
{
"epoch": 0.66,
"grad_norm": 0.9017059803009033,
"learning_rate": 2.7827688914063596e-06,
"loss": 0.6178,
"step": 10368
},
{
"epoch": 0.66,
"grad_norm": 0.9711951613426208,
"learning_rate": 2.78184933894934e-06,
"loss": 0.5575,
"step": 10369
},
{
"epoch": 0.66,
"grad_norm": 0.8450467586517334,
"learning_rate": 2.780929879890931e-06,
"loss": 0.5841,
"step": 10370
},
{
"epoch": 0.66,
"grad_norm": 0.9280916452407837,
"learning_rate": 2.780010514269841e-06,
"loss": 0.5718,
"step": 10371
},
{
"epoch": 0.66,
"grad_norm": 0.9153168201446533,
"learning_rate": 2.7790912421247883e-06,
"loss": 0.6029,
"step": 10372
},
{
"epoch": 0.66,
"grad_norm": 0.9410317540168762,
"learning_rate": 2.7781720634944766e-06,
"loss": 0.6092,
"step": 10373
},
{
"epoch": 0.66,
"grad_norm": 0.8701797723770142,
"learning_rate": 2.77725297841761e-06,
"loss": 0.5228,
"step": 10374
},
{
"epoch": 0.66,
"grad_norm": 0.8795192241668701,
"learning_rate": 2.7763339869328897e-06,
"loss": 0.5848,
"step": 10375
},
{
"epoch": 0.66,
"grad_norm": 0.920274555683136,
"learning_rate": 2.7754150890790067e-06,
"loss": 0.5968,
"step": 10376
},
{
"epoch": 0.66,
"grad_norm": 0.8954097032546997,
"learning_rate": 2.7744962848946565e-06,
"loss": 0.5599,
"step": 10377
},
{
"epoch": 0.66,
"grad_norm": 0.9171625375747681,
"learning_rate": 2.7735775744185276e-06,
"loss": 0.5803,
"step": 10378
},
{
"epoch": 0.66,
"grad_norm": 0.8371365666389465,
"learning_rate": 2.7726589576893004e-06,
"loss": 0.5397,
"step": 10379
},
{
"epoch": 0.66,
"grad_norm": 0.8804381489753723,
"learning_rate": 2.7717404347456567e-06,
"loss": 0.5273,
"step": 10380
},
{
"epoch": 0.66,
"grad_norm": 0.8363378643989563,
"learning_rate": 2.7708220056262706e-06,
"loss": 0.578,
"step": 10381
},
{
"epoch": 0.66,
"grad_norm": 0.9080025553703308,
"learning_rate": 2.7699036703698158e-06,
"loss": 0.5766,
"step": 10382
},
{
"epoch": 0.66,
"grad_norm": 0.9054446220397949,
"learning_rate": 2.7689854290149608e-06,
"loss": 0.6039,
"step": 10383
},
{
"epoch": 0.66,
"grad_norm": 0.8597883582115173,
"learning_rate": 2.768067281600365e-06,
"loss": 0.5887,
"step": 10384
},
{
"epoch": 0.66,
"grad_norm": 0.939932644367218,
"learning_rate": 2.7671492281646937e-06,
"loss": 0.5608,
"step": 10385
},
{
"epoch": 0.66,
"grad_norm": 0.8852954506874084,
"learning_rate": 2.7662312687466026e-06,
"loss": 0.5328,
"step": 10386
},
{
"epoch": 0.66,
"grad_norm": 0.8315883278846741,
"learning_rate": 2.7653134033847393e-06,
"loss": 0.5873,
"step": 10387
},
{
"epoch": 0.66,
"grad_norm": 0.8988177180290222,
"learning_rate": 2.7643956321177558e-06,
"loss": 0.5822,
"step": 10388
},
{
"epoch": 0.66,
"grad_norm": 0.8246173858642578,
"learning_rate": 2.763477954984295e-06,
"loss": 0.5949,
"step": 10389
},
{
"epoch": 0.66,
"grad_norm": 0.8828296065330505,
"learning_rate": 2.7625603720229964e-06,
"loss": 0.5875,
"step": 10390
},
{
"epoch": 0.66,
"grad_norm": 0.9308893084526062,
"learning_rate": 2.7616428832724983e-06,
"loss": 0.6503,
"step": 10391
},
{
"epoch": 0.66,
"grad_norm": 0.914340078830719,
"learning_rate": 2.760725488771433e-06,
"loss": 0.6379,
"step": 10392
},
{
"epoch": 0.66,
"grad_norm": 0.8978453874588013,
"learning_rate": 2.7598081885584237e-06,
"loss": 0.5879,
"step": 10393
},
{
"epoch": 0.66,
"grad_norm": 0.9084619283676147,
"learning_rate": 2.758890982672102e-06,
"loss": 0.5752,
"step": 10394
},
{
"epoch": 0.66,
"grad_norm": 0.8810911774635315,
"learning_rate": 2.757973871151083e-06,
"loss": 0.6372,
"step": 10395
},
{
"epoch": 0.66,
"grad_norm": 0.8374783992767334,
"learning_rate": 2.757056854033985e-06,
"loss": 0.5807,
"step": 10396
},
{
"epoch": 0.66,
"grad_norm": 0.9119501709938049,
"learning_rate": 2.7561399313594205e-06,
"loss": 0.586,
"step": 10397
},
{
"epoch": 0.66,
"grad_norm": 1.0351508855819702,
"learning_rate": 2.7552231031659972e-06,
"loss": 0.5944,
"step": 10398
},
{
"epoch": 0.66,
"grad_norm": 0.9276666045188904,
"learning_rate": 2.75430636949232e-06,
"loss": 0.6075,
"step": 10399
},
{
"epoch": 0.66,
"grad_norm": 0.851760745048523,
"learning_rate": 2.753389730376992e-06,
"loss": 0.5774,
"step": 10400
},
{
"epoch": 0.66,
"grad_norm": 0.8853036165237427,
"learning_rate": 2.752473185858603e-06,
"loss": 0.581,
"step": 10401
},
{
"epoch": 0.66,
"grad_norm": 0.9542864561080933,
"learning_rate": 2.7515567359757526e-06,
"loss": 0.6231,
"step": 10402
},
{
"epoch": 0.66,
"grad_norm": 0.8955079317092896,
"learning_rate": 2.750640380767025e-06,
"loss": 0.5894,
"step": 10403
},
{
"epoch": 0.66,
"grad_norm": 0.9134573340415955,
"learning_rate": 2.7497241202710056e-06,
"loss": 0.5966,
"step": 10404
},
{
"epoch": 0.66,
"grad_norm": 0.8695476651191711,
"learning_rate": 2.7488079545262757e-06,
"loss": 0.6017,
"step": 10405
},
{
"epoch": 0.66,
"grad_norm": 0.9271215796470642,
"learning_rate": 2.747891883571412e-06,
"loss": 0.6063,
"step": 10406
},
{
"epoch": 0.66,
"grad_norm": 0.8371964693069458,
"learning_rate": 2.746975907444986e-06,
"loss": 0.5274,
"step": 10407
},
{
"epoch": 0.66,
"grad_norm": 0.9001272320747375,
"learning_rate": 2.7460600261855687e-06,
"loss": 0.6252,
"step": 10408
},
{
"epoch": 0.66,
"grad_norm": 0.8996703028678894,
"learning_rate": 2.7451442398317206e-06,
"loss": 0.6346,
"step": 10409
},
{
"epoch": 0.66,
"grad_norm": 0.8631662130355835,
"learning_rate": 2.7442285484220055e-06,
"loss": 0.5648,
"step": 10410
},
{
"epoch": 0.66,
"grad_norm": 0.9067828059196472,
"learning_rate": 2.7433129519949784e-06,
"loss": 0.596,
"step": 10411
},
{
"epoch": 0.66,
"grad_norm": 0.8877487182617188,
"learning_rate": 2.742397450589193e-06,
"loss": 0.6091,
"step": 10412
},
{
"epoch": 0.66,
"grad_norm": 0.8369250893592834,
"learning_rate": 2.7414820442431976e-06,
"loss": 0.5867,
"step": 10413
},
{
"epoch": 0.66,
"grad_norm": 0.9432762265205383,
"learning_rate": 2.7405667329955344e-06,
"loss": 0.6184,
"step": 10414
},
{
"epoch": 0.66,
"grad_norm": 0.8785738348960876,
"learning_rate": 2.739651516884747e-06,
"loss": 0.5174,
"step": 10415
},
{
"epoch": 0.66,
"grad_norm": 0.8684585690498352,
"learning_rate": 2.7387363959493733e-06,
"loss": 0.5862,
"step": 10416
},
{
"epoch": 0.66,
"grad_norm": 0.8876842856407166,
"learning_rate": 2.737821370227942e-06,
"loss": 0.6136,
"step": 10417
},
{
"epoch": 0.66,
"grad_norm": 0.9569928050041199,
"learning_rate": 2.7369064397589828e-06,
"loss": 0.5912,
"step": 10418
},
{
"epoch": 0.66,
"grad_norm": 0.8708109259605408,
"learning_rate": 2.7359916045810207e-06,
"loss": 0.5547,
"step": 10419
},
{
"epoch": 0.66,
"grad_norm": 0.8798702359199524,
"learning_rate": 2.7350768647325766e-06,
"loss": 0.5619,
"step": 10420
},
{
"epoch": 0.66,
"grad_norm": 0.8539235591888428,
"learning_rate": 2.734162220252168e-06,
"loss": 0.5034,
"step": 10421
},
{
"epoch": 0.66,
"grad_norm": 0.9067310094833374,
"learning_rate": 2.7332476711783044e-06,
"loss": 0.6071,
"step": 10422
},
{
"epoch": 0.66,
"grad_norm": 0.8697945475578308,
"learning_rate": 2.732333217549494e-06,
"loss": 0.598,
"step": 10423
},
{
"epoch": 0.66,
"grad_norm": 0.8847575187683105,
"learning_rate": 2.7314188594042466e-06,
"loss": 0.5674,
"step": 10424
},
{
"epoch": 0.66,
"grad_norm": 0.9137183427810669,
"learning_rate": 2.7305045967810585e-06,
"loss": 0.5582,
"step": 10425
},
{
"epoch": 0.66,
"grad_norm": 0.8915376663208008,
"learning_rate": 2.7295904297184262e-06,
"loss": 0.5746,
"step": 10426
},
{
"epoch": 0.66,
"grad_norm": 0.8941647410392761,
"learning_rate": 2.7286763582548424e-06,
"loss": 0.5729,
"step": 10427
},
{
"epoch": 0.66,
"grad_norm": 0.9105641841888428,
"learning_rate": 2.7277623824287957e-06,
"loss": 0.5698,
"step": 10428
},
{
"epoch": 0.66,
"grad_norm": 0.8876394033432007,
"learning_rate": 2.726848502278773e-06,
"loss": 0.645,
"step": 10429
},
{
"epoch": 0.66,
"grad_norm": 0.9088033437728882,
"learning_rate": 2.7259347178432493e-06,
"loss": 0.5629,
"step": 10430
},
{
"epoch": 0.66,
"grad_norm": 0.910689115524292,
"learning_rate": 2.7250210291607026e-06,
"loss": 0.5819,
"step": 10431
},
{
"epoch": 0.66,
"grad_norm": 0.8679473996162415,
"learning_rate": 2.7241074362696108e-06,
"loss": 0.5183,
"step": 10432
},
{
"epoch": 0.66,
"grad_norm": 0.9289723634719849,
"learning_rate": 2.7231939392084347e-06,
"loss": 0.6181,
"step": 10433
},
{
"epoch": 0.66,
"grad_norm": 0.895182728767395,
"learning_rate": 2.7222805380156414e-06,
"loss": 0.6143,
"step": 10434
},
{
"epoch": 0.66,
"grad_norm": 0.8651720881462097,
"learning_rate": 2.7213672327296914e-06,
"loss": 0.58,
"step": 10435
},
{
"epoch": 0.66,
"grad_norm": 0.8684262037277222,
"learning_rate": 2.72045402338904e-06,
"loss": 0.5928,
"step": 10436
},
{
"epoch": 0.66,
"grad_norm": 0.9024814367294312,
"learning_rate": 2.719540910032142e-06,
"loss": 0.5643,
"step": 10437
},
{
"epoch": 0.66,
"grad_norm": 0.9180070161819458,
"learning_rate": 2.7186278926974406e-06,
"loss": 0.5972,
"step": 10438
},
{
"epoch": 0.66,
"grad_norm": 0.867103099822998,
"learning_rate": 2.717714971423383e-06,
"loss": 0.5539,
"step": 10439
},
{
"epoch": 0.66,
"grad_norm": 0.864017903804779,
"learning_rate": 2.7168021462484084e-06,
"loss": 0.5505,
"step": 10440
},
{
"epoch": 0.66,
"grad_norm": 0.8561496138572693,
"learning_rate": 2.715889417210953e-06,
"loss": 0.586,
"step": 10441
},
{
"epoch": 0.66,
"grad_norm": 0.874715268611908,
"learning_rate": 2.714976784349448e-06,
"loss": 0.5803,
"step": 10442
},
{
"epoch": 0.66,
"grad_norm": 0.9094971418380737,
"learning_rate": 2.7140642477023237e-06,
"loss": 0.5973,
"step": 10443
},
{
"epoch": 0.66,
"grad_norm": 0.9093654155731201,
"learning_rate": 2.7131518073079976e-06,
"loss": 0.6123,
"step": 10444
},
{
"epoch": 0.66,
"grad_norm": 0.8765634894371033,
"learning_rate": 2.7122394632048974e-06,
"loss": 0.5875,
"step": 10445
},
{
"epoch": 0.66,
"grad_norm": 0.9068828821182251,
"learning_rate": 2.7113272154314328e-06,
"loss": 0.6061,
"step": 10446
},
{
"epoch": 0.66,
"grad_norm": 0.8776718378067017,
"learning_rate": 2.710415064026018e-06,
"loss": 0.5145,
"step": 10447
},
{
"epoch": 0.66,
"grad_norm": 0.8980036377906799,
"learning_rate": 2.7095030090270596e-06,
"loss": 0.5571,
"step": 10448
},
{
"epoch": 0.66,
"grad_norm": 0.837546706199646,
"learning_rate": 2.7085910504729617e-06,
"loss": 0.6024,
"step": 10449
},
{
"epoch": 0.66,
"grad_norm": 0.8471895456314087,
"learning_rate": 2.7076791884021236e-06,
"loss": 0.5205,
"step": 10450
},
{
"epoch": 0.66,
"grad_norm": 0.882883608341217,
"learning_rate": 2.7067674228529417e-06,
"loss": 0.5888,
"step": 10451
},
{
"epoch": 0.66,
"grad_norm": 0.8597538471221924,
"learning_rate": 2.7058557538638026e-06,
"loss": 0.555,
"step": 10452
},
{
"epoch": 0.66,
"grad_norm": 0.8812461495399475,
"learning_rate": 2.7049441814731007e-06,
"loss": 0.5738,
"step": 10453
},
{
"epoch": 0.66,
"grad_norm": 0.8679195642471313,
"learning_rate": 2.704032705719214e-06,
"loss": 0.5855,
"step": 10454
},
{
"epoch": 0.66,
"grad_norm": 0.9121565818786621,
"learning_rate": 2.703121326640522e-06,
"loss": 0.6203,
"step": 10455
},
{
"epoch": 0.66,
"grad_norm": 0.8402708768844604,
"learning_rate": 2.702210044275401e-06,
"loss": 0.5068,
"step": 10456
},
{
"epoch": 0.66,
"grad_norm": 0.9229235053062439,
"learning_rate": 2.7012988586622224e-06,
"loss": 0.5719,
"step": 10457
},
{
"epoch": 0.66,
"grad_norm": 0.874308168888092,
"learning_rate": 2.7003877698393512e-06,
"loss": 0.5587,
"step": 10458
},
{
"epoch": 0.66,
"grad_norm": 0.8813081383705139,
"learning_rate": 2.6994767778451535e-06,
"loss": 0.5455,
"step": 10459
},
{
"epoch": 0.66,
"grad_norm": 0.8940520286560059,
"learning_rate": 2.6985658827179845e-06,
"loss": 0.5927,
"step": 10460
},
{
"epoch": 0.66,
"grad_norm": 0.8507505059242249,
"learning_rate": 2.6976550844961992e-06,
"loss": 0.5388,
"step": 10461
},
{
"epoch": 0.66,
"grad_norm": 0.9301406741142273,
"learning_rate": 2.6967443832181496e-06,
"loss": 0.57,
"step": 10462
},
{
"epoch": 0.66,
"grad_norm": 0.8438676595687866,
"learning_rate": 2.6958337789221813e-06,
"loss": 0.555,
"step": 10463
},
{
"epoch": 0.66,
"grad_norm": 0.9643988609313965,
"learning_rate": 2.694923271646637e-06,
"loss": 0.5663,
"step": 10464
},
{
"epoch": 0.66,
"grad_norm": 0.9135273098945618,
"learning_rate": 2.694012861429855e-06,
"loss": 0.603,
"step": 10465
},
{
"epoch": 0.66,
"grad_norm": 0.9375592470169067,
"learning_rate": 2.693102548310169e-06,
"loss": 0.5981,
"step": 10466
},
{
"epoch": 0.66,
"grad_norm": 0.8663008809089661,
"learning_rate": 2.6921923323259124e-06,
"loss": 0.5962,
"step": 10467
},
{
"epoch": 0.66,
"grad_norm": 0.9672373533248901,
"learning_rate": 2.691282213515406e-06,
"loss": 0.5845,
"step": 10468
},
{
"epoch": 0.66,
"grad_norm": 0.8220438957214355,
"learning_rate": 2.690372191916974e-06,
"loss": 0.57,
"step": 10469
},
{
"epoch": 0.66,
"grad_norm": 0.905386745929718,
"learning_rate": 2.6894622675689345e-06,
"loss": 0.6044,
"step": 10470
},
{
"epoch": 0.66,
"grad_norm": 0.860525906085968,
"learning_rate": 2.6885524405096007e-06,
"loss": 0.6008,
"step": 10471
},
{
"epoch": 0.66,
"grad_norm": 0.9347862005233765,
"learning_rate": 2.687642710777284e-06,
"loss": 0.6183,
"step": 10472
},
{
"epoch": 0.66,
"grad_norm": 0.8891615867614746,
"learning_rate": 2.6867330784102896e-06,
"loss": 0.5547,
"step": 10473
},
{
"epoch": 0.66,
"grad_norm": 0.9229059815406799,
"learning_rate": 2.6858235434469138e-06,
"loss": 0.6138,
"step": 10474
},
{
"epoch": 0.66,
"grad_norm": 0.8102059364318848,
"learning_rate": 2.684914105925463e-06,
"loss": 0.5539,
"step": 10475
},
{
"epoch": 0.66,
"grad_norm": 0.8751254081726074,
"learning_rate": 2.6840047658842226e-06,
"loss": 0.5189,
"step": 10476
},
{
"epoch": 0.66,
"grad_norm": 0.9399062991142273,
"learning_rate": 2.683095523361486e-06,
"loss": 0.6127,
"step": 10477
},
{
"epoch": 0.66,
"grad_norm": 0.9292119145393372,
"learning_rate": 2.682186378395536e-06,
"loss": 0.6257,
"step": 10478
},
{
"epoch": 0.66,
"grad_norm": 0.9873320460319519,
"learning_rate": 2.6812773310246547e-06,
"loss": 0.5942,
"step": 10479
},
{
"epoch": 0.66,
"grad_norm": 0.8316569328308105,
"learning_rate": 2.680368381287119e-06,
"loss": 0.5663,
"step": 10480
},
{
"epoch": 0.66,
"grad_norm": 0.893159031867981,
"learning_rate": 2.6794595292212035e-06,
"loss": 0.5561,
"step": 10481
},
{
"epoch": 0.66,
"grad_norm": 0.905292272567749,
"learning_rate": 2.67855077486517e-06,
"loss": 0.5821,
"step": 10482
},
{
"epoch": 0.66,
"grad_norm": 0.9160034656524658,
"learning_rate": 2.677642118257292e-06,
"loss": 0.6112,
"step": 10483
},
{
"epoch": 0.66,
"grad_norm": 0.88798987865448,
"learning_rate": 2.6767335594358234e-06,
"loss": 0.6043,
"step": 10484
},
{
"epoch": 0.66,
"grad_norm": 0.8431712985038757,
"learning_rate": 2.675825098439023e-06,
"loss": 0.5438,
"step": 10485
},
{
"epoch": 0.66,
"grad_norm": 0.9120664596557617,
"learning_rate": 2.6749167353051443e-06,
"loss": 0.6029,
"step": 10486
},
{
"epoch": 0.66,
"grad_norm": 0.8885997533798218,
"learning_rate": 2.674008470072429e-06,
"loss": 0.5532,
"step": 10487
},
{
"epoch": 0.66,
"grad_norm": 0.9783884286880493,
"learning_rate": 2.673100302779128e-06,
"loss": 0.5793,
"step": 10488
},
{
"epoch": 0.66,
"grad_norm": 0.8485262393951416,
"learning_rate": 2.6721922334634804e-06,
"loss": 0.5563,
"step": 10489
},
{
"epoch": 0.66,
"grad_norm": 0.896809458732605,
"learning_rate": 2.671284262163718e-06,
"loss": 0.5856,
"step": 10490
},
{
"epoch": 0.66,
"grad_norm": 0.8948637843132019,
"learning_rate": 2.6703763889180746e-06,
"loss": 0.5547,
"step": 10491
},
{
"epoch": 0.66,
"grad_norm": 0.8857586979866028,
"learning_rate": 2.6694686137647767e-06,
"loss": 0.5937,
"step": 10492
},
{
"epoch": 0.66,
"grad_norm": 0.8958655595779419,
"learning_rate": 2.668560936742048e-06,
"loss": 0.5439,
"step": 10493
},
{
"epoch": 0.66,
"grad_norm": 0.8610227704048157,
"learning_rate": 2.6676533578881102e-06,
"loss": 0.5449,
"step": 10494
},
{
"epoch": 0.66,
"grad_norm": 0.8370438814163208,
"learning_rate": 2.6667458772411724e-06,
"loss": 0.5593,
"step": 10495
},
{
"epoch": 0.66,
"grad_norm": 0.886195182800293,
"learning_rate": 2.66583849483945e-06,
"loss": 0.6025,
"step": 10496
},
{
"epoch": 0.67,
"grad_norm": 0.8641106486320496,
"learning_rate": 2.664931210721151e-06,
"loss": 0.5801,
"step": 10497
},
{
"epoch": 0.67,
"grad_norm": 0.8426538109779358,
"learning_rate": 2.6640240249244744e-06,
"loss": 0.5569,
"step": 10498
},
{
"epoch": 0.67,
"grad_norm": 0.8817174434661865,
"learning_rate": 2.6631169374876185e-06,
"loss": 0.562,
"step": 10499
},
{
"epoch": 0.67,
"grad_norm": 0.8603051900863647,
"learning_rate": 2.6622099484487794e-06,
"loss": 0.5917,
"step": 10500
},
{
"epoch": 0.67,
"grad_norm": 0.9094916582107544,
"learning_rate": 2.6613030578461476e-06,
"loss": 0.5641,
"step": 10501
},
{
"epoch": 0.67,
"grad_norm": 0.8471028804779053,
"learning_rate": 2.6603962657179094e-06,
"loss": 0.5664,
"step": 10502
},
{
"epoch": 0.67,
"grad_norm": 0.9582904577255249,
"learning_rate": 2.6594895721022436e-06,
"loss": 0.651,
"step": 10503
},
{
"epoch": 0.67,
"grad_norm": 0.8720226287841797,
"learning_rate": 2.6585829770373286e-06,
"loss": 0.5729,
"step": 10504
},
{
"epoch": 0.67,
"grad_norm": 1.0530695915222168,
"learning_rate": 2.657676480561342e-06,
"loss": 0.6192,
"step": 10505
},
{
"epoch": 0.67,
"grad_norm": 0.8338209986686707,
"learning_rate": 2.6567700827124494e-06,
"loss": 0.5738,
"step": 10506
},
{
"epoch": 0.67,
"grad_norm": 0.9002853631973267,
"learning_rate": 2.655863783528817e-06,
"loss": 0.5746,
"step": 10507
},
{
"epoch": 0.67,
"grad_norm": 0.9461910128593445,
"learning_rate": 2.6549575830486053e-06,
"loss": 0.6294,
"step": 10508
},
{
"epoch": 0.67,
"grad_norm": 0.883553683757782,
"learning_rate": 2.6540514813099728e-06,
"loss": 0.6029,
"step": 10509
},
{
"epoch": 0.67,
"grad_norm": 0.9209686517715454,
"learning_rate": 2.6531454783510736e-06,
"loss": 0.6247,
"step": 10510
},
{
"epoch": 0.67,
"grad_norm": 0.9430029988288879,
"learning_rate": 2.6522395742100514e-06,
"loss": 0.6145,
"step": 10511
},
{
"epoch": 0.67,
"grad_norm": 0.8918984532356262,
"learning_rate": 2.651333768925052e-06,
"loss": 0.5791,
"step": 10512
},
{
"epoch": 0.67,
"grad_norm": 0.9535161256790161,
"learning_rate": 2.6504280625342203e-06,
"loss": 0.6567,
"step": 10513
},
{
"epoch": 0.67,
"grad_norm": 0.8918493390083313,
"learning_rate": 2.6495224550756888e-06,
"loss": 0.6135,
"step": 10514
},
{
"epoch": 0.67,
"grad_norm": 0.8543890118598938,
"learning_rate": 2.6486169465875887e-06,
"loss": 0.5809,
"step": 10515
},
{
"epoch": 0.67,
"grad_norm": 0.9117350578308105,
"learning_rate": 2.647711537108052e-06,
"loss": 0.5975,
"step": 10516
},
{
"epoch": 0.67,
"grad_norm": 0.9220753908157349,
"learning_rate": 2.6468062266751955e-06,
"loss": 0.5826,
"step": 10517
},
{
"epoch": 0.67,
"grad_norm": 0.8443688750267029,
"learning_rate": 2.6459010153271456e-06,
"loss": 0.5437,
"step": 10518
},
{
"epoch": 0.67,
"grad_norm": 0.9096937775611877,
"learning_rate": 2.6449959031020134e-06,
"loss": 0.5755,
"step": 10519
},
{
"epoch": 0.67,
"grad_norm": 0.9416838884353638,
"learning_rate": 2.6440908900379115e-06,
"loss": 0.5631,
"step": 10520
},
{
"epoch": 0.67,
"grad_norm": 0.855556309223175,
"learning_rate": 2.6431859761729462e-06,
"loss": 0.596,
"step": 10521
},
{
"epoch": 0.67,
"grad_norm": 0.8807106018066406,
"learning_rate": 2.6422811615452205e-06,
"loss": 0.5545,
"step": 10522
},
{
"epoch": 0.67,
"grad_norm": 0.8947232365608215,
"learning_rate": 2.6413764461928335e-06,
"loss": 0.5682,
"step": 10523
},
{
"epoch": 0.67,
"grad_norm": 0.7953035235404968,
"learning_rate": 2.6404718301538814e-06,
"loss": 0.6003,
"step": 10524
},
{
"epoch": 0.67,
"grad_norm": 0.880653977394104,
"learning_rate": 2.639567313466448e-06,
"loss": 0.5458,
"step": 10525
},
{
"epoch": 0.67,
"grad_norm": 0.8574607968330383,
"learning_rate": 2.6386628961686277e-06,
"loss": 0.5645,
"step": 10526
},
{
"epoch": 0.67,
"grad_norm": 0.9444485306739807,
"learning_rate": 2.6377585782984972e-06,
"loss": 0.572,
"step": 10527
},
{
"epoch": 0.67,
"grad_norm": 0.942674994468689,
"learning_rate": 2.636854359894134e-06,
"loss": 0.5551,
"step": 10528
},
{
"epoch": 0.67,
"grad_norm": 0.8828451633453369,
"learning_rate": 2.635950240993614e-06,
"loss": 0.6088,
"step": 10529
},
{
"epoch": 0.67,
"grad_norm": 0.9297851920127869,
"learning_rate": 2.635046221635005e-06,
"loss": 0.6244,
"step": 10530
},
{
"epoch": 0.67,
"grad_norm": 0.8250426054000854,
"learning_rate": 2.6341423018563727e-06,
"loss": 0.5795,
"step": 10531
},
{
"epoch": 0.67,
"grad_norm": 0.8772184252738953,
"learning_rate": 2.633238481695779e-06,
"loss": 0.5588,
"step": 10532
},
{
"epoch": 0.67,
"grad_norm": 0.8727168440818787,
"learning_rate": 2.6323347611912786e-06,
"loss": 0.5697,
"step": 10533
},
{
"epoch": 0.67,
"grad_norm": 0.9432665705680847,
"learning_rate": 2.6314311403809224e-06,
"loss": 0.6384,
"step": 10534
},
{
"epoch": 0.67,
"grad_norm": 0.8159708380699158,
"learning_rate": 2.630527619302765e-06,
"loss": 0.5078,
"step": 10535
},
{
"epoch": 0.67,
"grad_norm": 0.8695153594017029,
"learning_rate": 2.6296241979948455e-06,
"loss": 0.5961,
"step": 10536
},
{
"epoch": 0.67,
"grad_norm": 0.9193745255470276,
"learning_rate": 2.6287208764952045e-06,
"loss": 0.6085,
"step": 10537
},
{
"epoch": 0.67,
"grad_norm": 0.8767115473747253,
"learning_rate": 2.6278176548418783e-06,
"loss": 0.5908,
"step": 10538
},
{
"epoch": 0.67,
"grad_norm": 0.9035547971725464,
"learning_rate": 2.6269145330728985e-06,
"loss": 0.5646,
"step": 10539
},
{
"epoch": 0.67,
"grad_norm": 0.8972700238227844,
"learning_rate": 2.626011511226294e-06,
"loss": 0.5871,
"step": 10540
},
{
"epoch": 0.67,
"grad_norm": 0.9117726683616638,
"learning_rate": 2.625108589340085e-06,
"loss": 0.5854,
"step": 10541
},
{
"epoch": 0.67,
"grad_norm": 0.8025404810905457,
"learning_rate": 2.624205767452289e-06,
"loss": 0.5042,
"step": 10542
},
{
"epoch": 0.67,
"grad_norm": 0.9173393249511719,
"learning_rate": 2.623303045600928e-06,
"loss": 0.527,
"step": 10543
},
{
"epoch": 0.67,
"grad_norm": 0.9533704519271851,
"learning_rate": 2.622400423824005e-06,
"loss": 0.6099,
"step": 10544
},
{
"epoch": 0.67,
"grad_norm": 0.9197595119476318,
"learning_rate": 2.62149790215953e-06,
"loss": 0.5371,
"step": 10545
},
{
"epoch": 0.67,
"grad_norm": 0.8468542098999023,
"learning_rate": 2.6205954806455057e-06,
"loss": 0.5773,
"step": 10546
},
{
"epoch": 0.67,
"grad_norm": 0.8749061226844788,
"learning_rate": 2.6196931593199247e-06,
"loss": 0.5984,
"step": 10547
},
{
"epoch": 0.67,
"grad_norm": 0.9503610134124756,
"learning_rate": 2.618790938220788e-06,
"loss": 0.5798,
"step": 10548
},
{
"epoch": 0.67,
"grad_norm": 0.8924551010131836,
"learning_rate": 2.617888817386079e-06,
"loss": 0.5438,
"step": 10549
},
{
"epoch": 0.67,
"grad_norm": 0.9542580246925354,
"learning_rate": 2.6169867968537856e-06,
"loss": 0.5912,
"step": 10550
},
{
"epoch": 0.67,
"grad_norm": 0.8375207781791687,
"learning_rate": 2.616084876661888e-06,
"loss": 0.5299,
"step": 10551
},
{
"epoch": 0.67,
"grad_norm": 0.8957962989807129,
"learning_rate": 2.6151830568483627e-06,
"loss": 0.5979,
"step": 10552
},
{
"epoch": 0.67,
"grad_norm": 0.9477977156639099,
"learning_rate": 2.614281337451183e-06,
"loss": 0.5815,
"step": 10553
},
{
"epoch": 0.67,
"grad_norm": 0.8835856914520264,
"learning_rate": 2.61337971850832e-06,
"loss": 0.6355,
"step": 10554
},
{
"epoch": 0.67,
"grad_norm": 0.8357982039451599,
"learning_rate": 2.6124782000577296e-06,
"loss": 0.5298,
"step": 10555
},
{
"epoch": 0.67,
"grad_norm": 0.8705008625984192,
"learning_rate": 2.6115767821373807e-06,
"loss": 0.5278,
"step": 10556
},
{
"epoch": 0.67,
"grad_norm": 0.8560452461242676,
"learning_rate": 2.610675464785223e-06,
"loss": 0.6022,
"step": 10557
},
{
"epoch": 0.67,
"grad_norm": 0.8124215006828308,
"learning_rate": 2.6097742480392097e-06,
"loss": 0.5591,
"step": 10558
},
{
"epoch": 0.67,
"grad_norm": 0.865249752998352,
"learning_rate": 2.6088731319372874e-06,
"loss": 0.5972,
"step": 10559
},
{
"epoch": 0.67,
"grad_norm": 0.9022778868675232,
"learning_rate": 2.6079721165173994e-06,
"loss": 0.5793,
"step": 10560
},
{
"epoch": 0.67,
"grad_norm": 0.9692482352256775,
"learning_rate": 2.6070712018174847e-06,
"loss": 0.5374,
"step": 10561
},
{
"epoch": 0.67,
"grad_norm": 0.8842456340789795,
"learning_rate": 2.6061703878754784e-06,
"loss": 0.592,
"step": 10562
},
{
"epoch": 0.67,
"grad_norm": 0.8886352777481079,
"learning_rate": 2.6052696747293087e-06,
"loss": 0.5585,
"step": 10563
},
{
"epoch": 0.67,
"grad_norm": 0.9306212663650513,
"learning_rate": 2.6043690624169014e-06,
"loss": 0.6042,
"step": 10564
},
{
"epoch": 0.67,
"grad_norm": 0.9206665754318237,
"learning_rate": 2.6034685509761803e-06,
"loss": 0.5784,
"step": 10565
},
{
"epoch": 0.67,
"grad_norm": 0.9081207513809204,
"learning_rate": 2.602568140445061e-06,
"loss": 0.5858,
"step": 10566
},
{
"epoch": 0.67,
"grad_norm": 0.9279916882514954,
"learning_rate": 2.6016678308614583e-06,
"loss": 0.5536,
"step": 10567
},
{
"epoch": 0.67,
"grad_norm": 0.8986056447029114,
"learning_rate": 2.600767622263277e-06,
"loss": 0.5619,
"step": 10568
},
{
"epoch": 0.67,
"grad_norm": 0.9222875833511353,
"learning_rate": 2.599867514688427e-06,
"loss": 0.587,
"step": 10569
},
{
"epoch": 0.67,
"grad_norm": 0.880499005317688,
"learning_rate": 2.598967508174808e-06,
"loss": 0.5327,
"step": 10570
},
{
"epoch": 0.67,
"grad_norm": 0.8843125700950623,
"learning_rate": 2.598067602760313e-06,
"loss": 0.5566,
"step": 10571
},
{
"epoch": 0.67,
"grad_norm": 0.8370311856269836,
"learning_rate": 2.597167798482835e-06,
"loss": 0.5875,
"step": 10572
},
{
"epoch": 0.67,
"grad_norm": 0.9187718033790588,
"learning_rate": 2.596268095380263e-06,
"loss": 0.5995,
"step": 10573
},
{
"epoch": 0.67,
"grad_norm": 0.8443053364753723,
"learning_rate": 2.5953684934904788e-06,
"loss": 0.5484,
"step": 10574
},
{
"epoch": 0.67,
"grad_norm": 0.8494760990142822,
"learning_rate": 2.5944689928513643e-06,
"loss": 0.5316,
"step": 10575
},
{
"epoch": 0.67,
"grad_norm": 0.9031586647033691,
"learning_rate": 2.593569593500789e-06,
"loss": 0.5691,
"step": 10576
},
{
"epoch": 0.67,
"grad_norm": 0.8875634074211121,
"learning_rate": 2.592670295476628e-06,
"loss": 0.5378,
"step": 10577
},
{
"epoch": 0.67,
"grad_norm": 0.8447946310043335,
"learning_rate": 2.591771098816749e-06,
"loss": 0.6219,
"step": 10578
},
{
"epoch": 0.67,
"grad_norm": 0.8940092921257019,
"learning_rate": 2.5908720035590085e-06,
"loss": 0.5784,
"step": 10579
},
{
"epoch": 0.67,
"grad_norm": 0.8719146251678467,
"learning_rate": 2.5899730097412678e-06,
"loss": 0.5754,
"step": 10580
},
{
"epoch": 0.67,
"grad_norm": 0.8950543403625488,
"learning_rate": 2.58907411740138e-06,
"loss": 0.5846,
"step": 10581
},
{
"epoch": 0.67,
"grad_norm": 0.8580577373504639,
"learning_rate": 2.5881753265771938e-06,
"loss": 0.5897,
"step": 10582
},
{
"epoch": 0.67,
"grad_norm": 0.8067103028297424,
"learning_rate": 2.587276637306556e-06,
"loss": 0.5128,
"step": 10583
},
{
"epoch": 0.67,
"grad_norm": 0.8843825459480286,
"learning_rate": 2.586378049627304e-06,
"loss": 0.5823,
"step": 10584
},
{
"epoch": 0.67,
"grad_norm": 0.8826472759246826,
"learning_rate": 2.5854795635772743e-06,
"loss": 0.5843,
"step": 10585
},
{
"epoch": 0.67,
"grad_norm": 0.8553101420402527,
"learning_rate": 2.584581179194304e-06,
"loss": 0.5796,
"step": 10586
},
{
"epoch": 0.67,
"grad_norm": 0.8672010898590088,
"learning_rate": 2.5836828965162167e-06,
"loss": 0.5931,
"step": 10587
},
{
"epoch": 0.67,
"grad_norm": 0.927105188369751,
"learning_rate": 2.582784715580836e-06,
"loss": 0.5827,
"step": 10588
},
{
"epoch": 0.67,
"grad_norm": 0.8936177492141724,
"learning_rate": 2.581886636425983e-06,
"loss": 0.5892,
"step": 10589
},
{
"epoch": 0.67,
"grad_norm": 0.9585930109024048,
"learning_rate": 2.580988659089471e-06,
"loss": 0.6104,
"step": 10590
},
{
"epoch": 0.67,
"grad_norm": 0.957203209400177,
"learning_rate": 2.580090783609114e-06,
"loss": 0.6265,
"step": 10591
},
{
"epoch": 0.67,
"grad_norm": 0.8555622696876526,
"learning_rate": 2.5791930100227133e-06,
"loss": 0.5533,
"step": 10592
},
{
"epoch": 0.67,
"grad_norm": 0.8494757413864136,
"learning_rate": 2.5782953383680733e-06,
"loss": 0.5352,
"step": 10593
},
{
"epoch": 0.67,
"grad_norm": 0.8753517270088196,
"learning_rate": 2.5773977686829928e-06,
"loss": 0.5861,
"step": 10594
},
{
"epoch": 0.67,
"grad_norm": 0.8782363533973694,
"learning_rate": 2.5765003010052643e-06,
"loss": 0.55,
"step": 10595
},
{
"epoch": 0.67,
"grad_norm": 0.9527836441993713,
"learning_rate": 2.5756029353726777e-06,
"loss": 0.5831,
"step": 10596
},
{
"epoch": 0.67,
"grad_norm": 0.9011462926864624,
"learning_rate": 2.574705671823019e-06,
"loss": 0.6172,
"step": 10597
},
{
"epoch": 0.67,
"grad_norm": 0.935152530670166,
"learning_rate": 2.5738085103940634e-06,
"loss": 0.6276,
"step": 10598
},
{
"epoch": 0.67,
"grad_norm": 0.8869521021842957,
"learning_rate": 2.572911451123594e-06,
"loss": 0.6076,
"step": 10599
},
{
"epoch": 0.67,
"grad_norm": 0.918903648853302,
"learning_rate": 2.572014494049382e-06,
"loss": 0.5971,
"step": 10600
},
{
"epoch": 0.67,
"grad_norm": 0.8759111166000366,
"learning_rate": 2.571117639209191e-06,
"loss": 0.6048,
"step": 10601
},
{
"epoch": 0.67,
"grad_norm": 0.8874173164367676,
"learning_rate": 2.5702208866407873e-06,
"loss": 0.5135,
"step": 10602
},
{
"epoch": 0.67,
"grad_norm": 0.8090372681617737,
"learning_rate": 2.5693242363819292e-06,
"loss": 0.5409,
"step": 10603
},
{
"epoch": 0.67,
"grad_norm": 0.8543607592582703,
"learning_rate": 2.5684276884703717e-06,
"loss": 0.5645,
"step": 10604
},
{
"epoch": 0.67,
"grad_norm": 0.9561940431594849,
"learning_rate": 2.567531242943867e-06,
"loss": 0.6001,
"step": 10605
},
{
"epoch": 0.67,
"grad_norm": 0.9354879260063171,
"learning_rate": 2.5666348998401565e-06,
"loss": 0.5695,
"step": 10606
},
{
"epoch": 0.67,
"grad_norm": 0.8753422498703003,
"learning_rate": 2.565738659196987e-06,
"loss": 0.556,
"step": 10607
},
{
"epoch": 0.67,
"grad_norm": 0.9323849678039551,
"learning_rate": 2.5648425210520967e-06,
"loss": 0.6462,
"step": 10608
},
{
"epoch": 0.67,
"grad_norm": 0.8231973052024841,
"learning_rate": 2.563946485443214e-06,
"loss": 0.5039,
"step": 10609
},
{
"epoch": 0.67,
"grad_norm": 0.8657350540161133,
"learning_rate": 2.5630505524080707e-06,
"loss": 0.5553,
"step": 10610
},
{
"epoch": 0.67,
"grad_norm": 0.8768414258956909,
"learning_rate": 2.5621547219843905e-06,
"loss": 0.5786,
"step": 10611
},
{
"epoch": 0.67,
"grad_norm": 0.9179761409759521,
"learning_rate": 2.5612589942098952e-06,
"loss": 0.5519,
"step": 10612
},
{
"epoch": 0.67,
"grad_norm": 0.8903763294219971,
"learning_rate": 2.560363369122301e-06,
"loss": 0.6134,
"step": 10613
},
{
"epoch": 0.67,
"grad_norm": 0.9475022554397583,
"learning_rate": 2.559467846759317e-06,
"loss": 0.5948,
"step": 10614
},
{
"epoch": 0.67,
"grad_norm": 0.9176366329193115,
"learning_rate": 2.5585724271586505e-06,
"loss": 0.5935,
"step": 10615
},
{
"epoch": 0.67,
"grad_norm": 0.9265202283859253,
"learning_rate": 2.557677110358009e-06,
"loss": 0.5969,
"step": 10616
},
{
"epoch": 0.67,
"grad_norm": 0.8886149525642395,
"learning_rate": 2.556781896395087e-06,
"loss": 0.571,
"step": 10617
},
{
"epoch": 0.67,
"grad_norm": 0.942081093788147,
"learning_rate": 2.55588678530758e-06,
"loss": 0.5508,
"step": 10618
},
{
"epoch": 0.67,
"grad_norm": 0.9063771367073059,
"learning_rate": 2.5549917771331767e-06,
"loss": 0.5414,
"step": 10619
},
{
"epoch": 0.67,
"grad_norm": 0.8666827082633972,
"learning_rate": 2.5540968719095656e-06,
"loss": 0.5374,
"step": 10620
},
{
"epoch": 0.67,
"grad_norm": 0.9030107259750366,
"learning_rate": 2.5532020696744277e-06,
"loss": 0.5554,
"step": 10621
},
{
"epoch": 0.67,
"grad_norm": 0.9230242967605591,
"learning_rate": 2.5523073704654374e-06,
"loss": 0.6067,
"step": 10622
},
{
"epoch": 0.67,
"grad_norm": 0.9134321808815002,
"learning_rate": 2.5514127743202668e-06,
"loss": 0.5651,
"step": 10623
},
{
"epoch": 0.67,
"grad_norm": 0.8783094882965088,
"learning_rate": 2.5505182812765894e-06,
"loss": 0.5925,
"step": 10624
},
{
"epoch": 0.67,
"grad_norm": 0.9257774949073792,
"learning_rate": 2.549623891372065e-06,
"loss": 0.6196,
"step": 10625
},
{
"epoch": 0.67,
"grad_norm": 0.924429714679718,
"learning_rate": 2.5487296046443537e-06,
"loss": 0.5452,
"step": 10626
},
{
"epoch": 0.67,
"grad_norm": 0.9063295125961304,
"learning_rate": 2.547835421131114e-06,
"loss": 0.6303,
"step": 10627
},
{
"epoch": 0.67,
"grad_norm": 0.8978242874145508,
"learning_rate": 2.5469413408699894e-06,
"loss": 0.5459,
"step": 10628
},
{
"epoch": 0.67,
"grad_norm": 0.9666255712509155,
"learning_rate": 2.546047363898636e-06,
"loss": 0.6131,
"step": 10629
},
{
"epoch": 0.67,
"grad_norm": 0.9179185032844543,
"learning_rate": 2.545153490254689e-06,
"loss": 0.6264,
"step": 10630
},
{
"epoch": 0.67,
"grad_norm": 0.8218669295310974,
"learning_rate": 2.5442597199757896e-06,
"loss": 0.5331,
"step": 10631
},
{
"epoch": 0.67,
"grad_norm": 0.8918872475624084,
"learning_rate": 2.5433660530995696e-06,
"loss": 0.6075,
"step": 10632
},
{
"epoch": 0.67,
"grad_norm": 0.8965834379196167,
"learning_rate": 2.54247248966366e-06,
"loss": 0.5411,
"step": 10633
},
{
"epoch": 0.67,
"grad_norm": 0.9281273484230042,
"learning_rate": 2.5415790297056843e-06,
"loss": 0.545,
"step": 10634
},
{
"epoch": 0.67,
"grad_norm": 0.882722795009613,
"learning_rate": 2.5406856732632647e-06,
"loss": 0.6063,
"step": 10635
},
{
"epoch": 0.67,
"grad_norm": 0.8930636048316956,
"learning_rate": 2.539792420374013e-06,
"loss": 0.6173,
"step": 10636
},
{
"epoch": 0.67,
"grad_norm": 0.8792672753334045,
"learning_rate": 2.5388992710755477e-06,
"loss": 0.5656,
"step": 10637
},
{
"epoch": 0.67,
"grad_norm": 0.8245922327041626,
"learning_rate": 2.5380062254054706e-06,
"loss": 0.5317,
"step": 10638
},
{
"epoch": 0.67,
"grad_norm": 0.964028537273407,
"learning_rate": 2.5371132834013867e-06,
"loss": 0.5868,
"step": 10639
},
{
"epoch": 0.67,
"grad_norm": 0.8802077770233154,
"learning_rate": 2.5362204451008963e-06,
"loss": 0.5438,
"step": 10640
},
{
"epoch": 0.67,
"grad_norm": 0.8937103152275085,
"learning_rate": 2.5353277105415887e-06,
"loss": 0.6146,
"step": 10641
},
{
"epoch": 0.67,
"grad_norm": 1.144944667816162,
"learning_rate": 2.5344350797610597e-06,
"loss": 0.5291,
"step": 10642
},
{
"epoch": 0.67,
"grad_norm": 0.8854457139968872,
"learning_rate": 2.533542552796893e-06,
"loss": 0.5663,
"step": 10643
},
{
"epoch": 0.67,
"grad_norm": 0.9482905864715576,
"learning_rate": 2.5326501296866677e-06,
"loss": 0.5978,
"step": 10644
},
{
"epoch": 0.67,
"grad_norm": 0.8801560401916504,
"learning_rate": 2.531757810467963e-06,
"loss": 0.5489,
"step": 10645
},
{
"epoch": 0.67,
"grad_norm": 0.8408117890357971,
"learning_rate": 2.53086559517835e-06,
"loss": 0.5729,
"step": 10646
},
{
"epoch": 0.67,
"grad_norm": 0.8972226977348328,
"learning_rate": 2.529973483855397e-06,
"loss": 0.507,
"step": 10647
},
{
"epoch": 0.67,
"grad_norm": 0.8918501734733582,
"learning_rate": 2.52908147653667e-06,
"loss": 0.5221,
"step": 10648
},
{
"epoch": 0.67,
"grad_norm": 0.8490516543388367,
"learning_rate": 2.5281895732597227e-06,
"loss": 0.5688,
"step": 10649
},
{
"epoch": 0.67,
"grad_norm": 0.9360247850418091,
"learning_rate": 2.527297774062115e-06,
"loss": 0.5614,
"step": 10650
},
{
"epoch": 0.67,
"grad_norm": 0.8709716796875,
"learning_rate": 2.5264060789813994e-06,
"loss": 0.5421,
"step": 10651
},
{
"epoch": 0.67,
"grad_norm": 0.8740971088409424,
"learning_rate": 2.525514488055116e-06,
"loss": 0.5832,
"step": 10652
},
{
"epoch": 0.67,
"grad_norm": 0.9189413785934448,
"learning_rate": 2.5246230013208093e-06,
"loss": 0.6433,
"step": 10653
},
{
"epoch": 0.67,
"grad_norm": 0.7839402556419373,
"learning_rate": 2.5237316188160165e-06,
"loss": 0.4975,
"step": 10654
},
{
"epoch": 0.68,
"grad_norm": 0.9069191217422485,
"learning_rate": 2.522840340578272e-06,
"loss": 0.5909,
"step": 10655
},
{
"epoch": 0.68,
"grad_norm": 0.9287664294242859,
"learning_rate": 2.521949166645102e-06,
"loss": 0.6464,
"step": 10656
},
{
"epoch": 0.68,
"grad_norm": 0.8806740045547485,
"learning_rate": 2.5210580970540354e-06,
"loss": 0.5462,
"step": 10657
},
{
"epoch": 0.68,
"grad_norm": 0.8837103247642517,
"learning_rate": 2.5201671318425834e-06,
"loss": 0.5808,
"step": 10658
},
{
"epoch": 0.68,
"grad_norm": 0.9099284410476685,
"learning_rate": 2.519276271048272e-06,
"loss": 0.5988,
"step": 10659
},
{
"epoch": 0.68,
"grad_norm": 0.9354240894317627,
"learning_rate": 2.5183855147086045e-06,
"loss": 0.6039,
"step": 10660
},
{
"epoch": 0.68,
"grad_norm": 0.8624934554100037,
"learning_rate": 2.51749486286109e-06,
"loss": 0.5186,
"step": 10661
},
{
"epoch": 0.68,
"grad_norm": 0.8302717208862305,
"learning_rate": 2.516604315543231e-06,
"loss": 0.578,
"step": 10662
},
{
"epoch": 0.68,
"grad_norm": 0.9564114212989807,
"learning_rate": 2.515713872792525e-06,
"loss": 0.6205,
"step": 10663
},
{
"epoch": 0.68,
"grad_norm": 0.9036477208137512,
"learning_rate": 2.5148235346464654e-06,
"loss": 0.5378,
"step": 10664
},
{
"epoch": 0.68,
"grad_norm": 0.850906252861023,
"learning_rate": 2.5139333011425435e-06,
"loss": 0.5776,
"step": 10665
},
{
"epoch": 0.68,
"grad_norm": 0.8165357708930969,
"learning_rate": 2.5130431723182386e-06,
"loss": 0.5129,
"step": 10666
},
{
"epoch": 0.68,
"grad_norm": 0.9315398931503296,
"learning_rate": 2.512153148211038e-06,
"loss": 0.602,
"step": 10667
},
{
"epoch": 0.68,
"grad_norm": 0.97512286901474,
"learning_rate": 2.5112632288584116e-06,
"loss": 0.5975,
"step": 10668
},
{
"epoch": 0.68,
"grad_norm": 0.9549464583396912,
"learning_rate": 2.5103734142978325e-06,
"loss": 0.6342,
"step": 10669
},
{
"epoch": 0.68,
"grad_norm": 0.9069748520851135,
"learning_rate": 2.5094837045667684e-06,
"loss": 0.5671,
"step": 10670
},
{
"epoch": 0.68,
"grad_norm": 0.8227144479751587,
"learning_rate": 2.508594099702682e-06,
"loss": 0.5805,
"step": 10671
},
{
"epoch": 0.68,
"grad_norm": 0.9068019390106201,
"learning_rate": 2.5077045997430304e-06,
"loss": 0.5906,
"step": 10672
},
{
"epoch": 0.68,
"grad_norm": 0.9003850221633911,
"learning_rate": 2.5068152047252702e-06,
"loss": 0.6073,
"step": 10673
},
{
"epoch": 0.68,
"grad_norm": 0.9493726491928101,
"learning_rate": 2.5059259146868474e-06,
"loss": 0.5954,
"step": 10674
},
{
"epoch": 0.68,
"grad_norm": 0.8758067488670349,
"learning_rate": 2.5050367296652075e-06,
"loss": 0.5644,
"step": 10675
},
{
"epoch": 0.68,
"grad_norm": 0.9231355786323547,
"learning_rate": 2.504147649697791e-06,
"loss": 0.6068,
"step": 10676
},
{
"epoch": 0.68,
"grad_norm": 0.8375126123428345,
"learning_rate": 2.5032586748220354e-06,
"loss": 0.5932,
"step": 10677
},
{
"epoch": 0.68,
"grad_norm": 0.886325478553772,
"learning_rate": 2.5023698050753732e-06,
"loss": 0.6023,
"step": 10678
},
{
"epoch": 0.68,
"grad_norm": 0.8187273740768433,
"learning_rate": 2.5014810404952262e-06,
"loss": 0.5291,
"step": 10679
},
{
"epoch": 0.68,
"grad_norm": 0.8535604476928711,
"learning_rate": 2.5005923811190226e-06,
"loss": 0.5721,
"step": 10680
},
{
"epoch": 0.68,
"grad_norm": 0.8962329030036926,
"learning_rate": 2.4997038269841804e-06,
"loss": 0.5637,
"step": 10681
},
{
"epoch": 0.68,
"grad_norm": 0.9370246529579163,
"learning_rate": 2.498815378128111e-06,
"loss": 0.61,
"step": 10682
},
{
"epoch": 0.68,
"grad_norm": 0.8292384743690491,
"learning_rate": 2.497927034588225e-06,
"loss": 0.5253,
"step": 10683
},
{
"epoch": 0.68,
"grad_norm": 0.8883755803108215,
"learning_rate": 2.497038796401927e-06,
"loss": 0.5684,
"step": 10684
},
{
"epoch": 0.68,
"grad_norm": 0.833806037902832,
"learning_rate": 2.4961506636066185e-06,
"loss": 0.623,
"step": 10685
},
{
"epoch": 0.68,
"grad_norm": 0.9147443771362305,
"learning_rate": 2.495262636239697e-06,
"loss": 0.6173,
"step": 10686
},
{
"epoch": 0.68,
"grad_norm": 0.8724647760391235,
"learning_rate": 2.4943747143385503e-06,
"loss": 0.6106,
"step": 10687
},
{
"epoch": 0.68,
"grad_norm": 0.8868297934532166,
"learning_rate": 2.4934868979405667e-06,
"loss": 0.5995,
"step": 10688
},
{
"epoch": 0.68,
"grad_norm": 0.8661412000656128,
"learning_rate": 2.492599187083134e-06,
"loss": 0.6492,
"step": 10689
},
{
"epoch": 0.68,
"grad_norm": 0.8457236289978027,
"learning_rate": 2.491711581803625e-06,
"loss": 0.5432,
"step": 10690
},
{
"epoch": 0.68,
"grad_norm": 0.864971935749054,
"learning_rate": 2.490824082139415e-06,
"loss": 0.5379,
"step": 10691
},
{
"epoch": 0.68,
"grad_norm": 0.9583846926689148,
"learning_rate": 2.489936688127875e-06,
"loss": 0.6022,
"step": 10692
},
{
"epoch": 0.68,
"grad_norm": 0.8547648787498474,
"learning_rate": 2.4890493998063685e-06,
"loss": 0.5668,
"step": 10693
},
{
"epoch": 0.68,
"grad_norm": 0.8871789574623108,
"learning_rate": 2.4881622172122595e-06,
"loss": 0.6422,
"step": 10694
},
{
"epoch": 0.68,
"grad_norm": 0.9174278974533081,
"learning_rate": 2.4872751403828986e-06,
"loss": 0.6206,
"step": 10695
},
{
"epoch": 0.68,
"grad_norm": 0.9678024649620056,
"learning_rate": 2.4863881693556393e-06,
"loss": 0.5992,
"step": 10696
},
{
"epoch": 0.68,
"grad_norm": 0.8451238870620728,
"learning_rate": 2.4855013041678335e-06,
"loss": 0.544,
"step": 10697
},
{
"epoch": 0.68,
"grad_norm": 0.8987723588943481,
"learning_rate": 2.484614544856819e-06,
"loss": 0.5851,
"step": 10698
},
{
"epoch": 0.68,
"grad_norm": 0.8522927761077881,
"learning_rate": 2.483727891459935e-06,
"loss": 0.5805,
"step": 10699
},
{
"epoch": 0.68,
"grad_norm": 0.9000210762023926,
"learning_rate": 2.482841344014516e-06,
"loss": 0.5587,
"step": 10700
},
{
"epoch": 0.68,
"grad_norm": 0.8868560791015625,
"learning_rate": 2.4819549025578917e-06,
"loss": 0.5957,
"step": 10701
},
{
"epoch": 0.68,
"grad_norm": 0.8964491486549377,
"learning_rate": 2.481068567127389e-06,
"loss": 0.5865,
"step": 10702
},
{
"epoch": 0.68,
"grad_norm": 0.8719425797462463,
"learning_rate": 2.4801823377603236e-06,
"loss": 0.5505,
"step": 10703
},
{
"epoch": 0.68,
"grad_norm": 0.9729426503181458,
"learning_rate": 2.4792962144940148e-06,
"loss": 0.5863,
"step": 10704
},
{
"epoch": 0.68,
"grad_norm": 0.906240701675415,
"learning_rate": 2.4784101973657724e-06,
"loss": 0.6275,
"step": 10705
},
{
"epoch": 0.68,
"grad_norm": 0.8632292747497559,
"learning_rate": 2.4775242864129055e-06,
"loss": 0.5586,
"step": 10706
},
{
"epoch": 0.68,
"grad_norm": 0.9146695733070374,
"learning_rate": 2.4766384816727164e-06,
"loss": 0.5932,
"step": 10707
},
{
"epoch": 0.68,
"grad_norm": 0.8897523880004883,
"learning_rate": 2.475752783182504e-06,
"loss": 0.5804,
"step": 10708
},
{
"epoch": 0.68,
"grad_norm": 0.870412290096283,
"learning_rate": 2.4748671909795568e-06,
"loss": 0.5777,
"step": 10709
},
{
"epoch": 0.68,
"grad_norm": 0.8530032634735107,
"learning_rate": 2.4739817051011717e-06,
"loss": 0.5358,
"step": 10710
},
{
"epoch": 0.68,
"grad_norm": 0.8868164420127869,
"learning_rate": 2.473096325584628e-06,
"loss": 0.5883,
"step": 10711
},
{
"epoch": 0.68,
"grad_norm": 0.9638000130653381,
"learning_rate": 2.4722110524672074e-06,
"loss": 0.6171,
"step": 10712
},
{
"epoch": 0.68,
"grad_norm": 0.945044755935669,
"learning_rate": 2.4713258857861856e-06,
"loss": 0.5875,
"step": 10713
},
{
"epoch": 0.68,
"grad_norm": 0.8738934993743896,
"learning_rate": 2.4704408255788342e-06,
"loss": 0.6121,
"step": 10714
},
{
"epoch": 0.68,
"grad_norm": 0.8028507828712463,
"learning_rate": 2.4695558718824204e-06,
"loss": 0.5547,
"step": 10715
},
{
"epoch": 0.68,
"grad_norm": 0.8492550253868103,
"learning_rate": 2.468671024734208e-06,
"loss": 0.5684,
"step": 10716
},
{
"epoch": 0.68,
"grad_norm": 0.875840961933136,
"learning_rate": 2.4677862841714485e-06,
"loss": 0.5418,
"step": 10717
},
{
"epoch": 0.68,
"grad_norm": 0.8230100870132446,
"learning_rate": 2.4669016502314038e-06,
"loss": 0.5823,
"step": 10718
},
{
"epoch": 0.68,
"grad_norm": 0.8757971525192261,
"learning_rate": 2.4660171229513165e-06,
"loss": 0.5821,
"step": 10719
},
{
"epoch": 0.68,
"grad_norm": 0.8600106239318848,
"learning_rate": 2.465132702368433e-06,
"loss": 0.5618,
"step": 10720
},
{
"epoch": 0.68,
"grad_norm": 0.8039467930793762,
"learning_rate": 2.4642483885199938e-06,
"loss": 0.5544,
"step": 10721
},
{
"epoch": 0.68,
"grad_norm": 0.8692030906677246,
"learning_rate": 2.463364181443233e-06,
"loss": 0.6235,
"step": 10722
},
{
"epoch": 0.68,
"grad_norm": 0.8942098617553711,
"learning_rate": 2.4624800811753826e-06,
"loss": 0.6117,
"step": 10723
},
{
"epoch": 0.68,
"grad_norm": 0.8640264272689819,
"learning_rate": 2.4615960877536706e-06,
"loss": 0.5282,
"step": 10724
},
{
"epoch": 0.68,
"grad_norm": 0.8637884259223938,
"learning_rate": 2.4607122012153146e-06,
"loss": 0.5566,
"step": 10725
},
{
"epoch": 0.68,
"grad_norm": 0.8512043356895447,
"learning_rate": 2.459828421597534e-06,
"loss": 0.5275,
"step": 10726
},
{
"epoch": 0.68,
"grad_norm": 0.9386641383171082,
"learning_rate": 2.458944748937543e-06,
"loss": 0.619,
"step": 10727
},
{
"epoch": 0.68,
"grad_norm": 0.9329386949539185,
"learning_rate": 2.4580611832725482e-06,
"loss": 0.555,
"step": 10728
},
{
"epoch": 0.68,
"grad_norm": 0.8880377411842346,
"learning_rate": 2.4571777246397543e-06,
"loss": 0.6331,
"step": 10729
},
{
"epoch": 0.68,
"grad_norm": 0.9043840765953064,
"learning_rate": 2.456294373076361e-06,
"loss": 0.5705,
"step": 10730
},
{
"epoch": 0.68,
"grad_norm": 0.8265879154205322,
"learning_rate": 2.455411128619562e-06,
"loss": 0.5232,
"step": 10731
},
{
"epoch": 0.68,
"grad_norm": 0.9159626364707947,
"learning_rate": 2.4545279913065513e-06,
"loss": 0.6011,
"step": 10732
},
{
"epoch": 0.68,
"grad_norm": 0.9764483571052551,
"learning_rate": 2.4536449611745087e-06,
"loss": 0.6575,
"step": 10733
},
{
"epoch": 0.68,
"grad_norm": 0.9006572365760803,
"learning_rate": 2.45276203826062e-06,
"loss": 0.5808,
"step": 10734
},
{
"epoch": 0.68,
"grad_norm": 0.8400965929031372,
"learning_rate": 2.451879222602059e-06,
"loss": 0.5425,
"step": 10735
},
{
"epoch": 0.68,
"grad_norm": 1.0129814147949219,
"learning_rate": 2.4509965142360013e-06,
"loss": 0.6147,
"step": 10736
},
{
"epoch": 0.68,
"grad_norm": 1.019564151763916,
"learning_rate": 2.4501139131996122e-06,
"loss": 0.5869,
"step": 10737
},
{
"epoch": 0.68,
"grad_norm": 0.9409759044647217,
"learning_rate": 2.4492314195300583e-06,
"loss": 0.5929,
"step": 10738
},
{
"epoch": 0.68,
"grad_norm": 0.924321711063385,
"learning_rate": 2.4483490332644918e-06,
"loss": 0.602,
"step": 10739
},
{
"epoch": 0.68,
"grad_norm": 0.8794954419136047,
"learning_rate": 2.4474667544400744e-06,
"loss": 0.5726,
"step": 10740
},
{
"epoch": 0.68,
"grad_norm": 0.9003675580024719,
"learning_rate": 2.4465845830939504e-06,
"loss": 0.6345,
"step": 10741
},
{
"epoch": 0.68,
"grad_norm": 0.8394778370857239,
"learning_rate": 2.4457025192632672e-06,
"loss": 0.5132,
"step": 10742
},
{
"epoch": 0.68,
"grad_norm": 0.8338208198547363,
"learning_rate": 2.444820562985165e-06,
"loss": 0.5275,
"step": 10743
},
{
"epoch": 0.68,
"grad_norm": 0.9329877495765686,
"learning_rate": 2.443938714296781e-06,
"loss": 0.5879,
"step": 10744
},
{
"epoch": 0.68,
"grad_norm": 0.8452143669128418,
"learning_rate": 2.4430569732352444e-06,
"loss": 0.5829,
"step": 10745
},
{
"epoch": 0.68,
"grad_norm": 0.9036096930503845,
"learning_rate": 2.4421753398376865e-06,
"loss": 0.6168,
"step": 10746
},
{
"epoch": 0.68,
"grad_norm": 0.8841857314109802,
"learning_rate": 2.441293814141223e-06,
"loss": 0.5878,
"step": 10747
},
{
"epoch": 0.68,
"grad_norm": 0.8716691732406616,
"learning_rate": 2.4404123961829795e-06,
"loss": 0.5744,
"step": 10748
},
{
"epoch": 0.68,
"grad_norm": 0.8817796111106873,
"learning_rate": 2.4395310860000644e-06,
"loss": 0.5486,
"step": 10749
},
{
"epoch": 0.68,
"grad_norm": 0.9194137454032898,
"learning_rate": 2.438649883629588e-06,
"loss": 0.5834,
"step": 10750
},
{
"epoch": 0.68,
"grad_norm": 0.9086952805519104,
"learning_rate": 2.437768789108656e-06,
"loss": 0.6189,
"step": 10751
},
{
"epoch": 0.68,
"grad_norm": 0.9165956377983093,
"learning_rate": 2.4368878024743638e-06,
"loss": 0.5459,
"step": 10752
},
{
"epoch": 0.68,
"grad_norm": 0.9193375110626221,
"learning_rate": 2.4360069237638114e-06,
"loss": 0.5939,
"step": 10753
},
{
"epoch": 0.68,
"grad_norm": 0.9131724238395691,
"learning_rate": 2.43512615301409e-06,
"loss": 0.5554,
"step": 10754
},
{
"epoch": 0.68,
"grad_norm": 0.9217658638954163,
"learning_rate": 2.434245490262282e-06,
"loss": 0.5851,
"step": 10755
},
{
"epoch": 0.68,
"grad_norm": 0.8388816714286804,
"learning_rate": 2.4333649355454704e-06,
"loss": 0.6016,
"step": 10756
},
{
"epoch": 0.68,
"grad_norm": 0.8548718690872192,
"learning_rate": 2.4324844889007328e-06,
"loss": 0.628,
"step": 10757
},
{
"epoch": 0.68,
"grad_norm": 0.8744621276855469,
"learning_rate": 2.4316041503651417e-06,
"loss": 0.5213,
"step": 10758
},
{
"epoch": 0.68,
"grad_norm": 0.9566894769668579,
"learning_rate": 2.430723919975767e-06,
"loss": 0.5779,
"step": 10759
},
{
"epoch": 0.68,
"grad_norm": 0.8871926665306091,
"learning_rate": 2.4298437977696658e-06,
"loss": 0.6031,
"step": 10760
},
{
"epoch": 0.68,
"grad_norm": 0.8938164114952087,
"learning_rate": 2.428963783783904e-06,
"loss": 0.5478,
"step": 10761
},
{
"epoch": 0.68,
"grad_norm": 0.9035009741783142,
"learning_rate": 2.4280838780555347e-06,
"loss": 0.5693,
"step": 10762
},
{
"epoch": 0.68,
"grad_norm": 0.9277425408363342,
"learning_rate": 2.427204080621605e-06,
"loss": 0.5697,
"step": 10763
},
{
"epoch": 0.68,
"grad_norm": 0.8887539505958557,
"learning_rate": 2.426324391519161e-06,
"loss": 0.5116,
"step": 10764
},
{
"epoch": 0.68,
"grad_norm": 0.8846824765205383,
"learning_rate": 2.4254448107852434e-06,
"loss": 0.6138,
"step": 10765
},
{
"epoch": 0.68,
"grad_norm": 0.8920591473579407,
"learning_rate": 2.424565338456889e-06,
"loss": 0.5854,
"step": 10766
},
{
"epoch": 0.68,
"grad_norm": 0.8790110945701599,
"learning_rate": 2.4236859745711305e-06,
"loss": 0.6154,
"step": 10767
},
{
"epoch": 0.68,
"grad_norm": 0.9227504730224609,
"learning_rate": 2.4228067191649917e-06,
"loss": 0.5714,
"step": 10768
},
{
"epoch": 0.68,
"grad_norm": 0.8249009847640991,
"learning_rate": 2.421927572275494e-06,
"loss": 0.5788,
"step": 10769
},
{
"epoch": 0.68,
"grad_norm": 0.8537331223487854,
"learning_rate": 2.4210485339396627e-06,
"loss": 0.5664,
"step": 10770
},
{
"epoch": 0.68,
"grad_norm": 0.9200884103775024,
"learning_rate": 2.4201696041945033e-06,
"loss": 0.6134,
"step": 10771
},
{
"epoch": 0.68,
"grad_norm": 0.8930040001869202,
"learning_rate": 2.419290783077028e-06,
"loss": 0.5332,
"step": 10772
},
{
"epoch": 0.68,
"grad_norm": 0.932697057723999,
"learning_rate": 2.41841207062424e-06,
"loss": 0.5486,
"step": 10773
},
{
"epoch": 0.68,
"grad_norm": 0.9301908016204834,
"learning_rate": 2.4175334668731383e-06,
"loss": 0.6119,
"step": 10774
},
{
"epoch": 0.68,
"grad_norm": 0.8337537050247192,
"learning_rate": 2.416654971860721e-06,
"loss": 0.5692,
"step": 10775
},
{
"epoch": 0.68,
"grad_norm": 0.881458580493927,
"learning_rate": 2.415776585623974e-06,
"loss": 0.5814,
"step": 10776
},
{
"epoch": 0.68,
"grad_norm": 0.9348959922790527,
"learning_rate": 2.4148983081998834e-06,
"loss": 0.5929,
"step": 10777
},
{
"epoch": 0.68,
"grad_norm": 0.8827102184295654,
"learning_rate": 2.414020139625436e-06,
"loss": 0.5264,
"step": 10778
},
{
"epoch": 0.68,
"grad_norm": 0.8925660848617554,
"learning_rate": 2.413142079937602e-06,
"loss": 0.6432,
"step": 10779
},
{
"epoch": 0.68,
"grad_norm": 0.9171149134635925,
"learning_rate": 2.4122641291733567e-06,
"loss": 0.5957,
"step": 10780
},
{
"epoch": 0.68,
"grad_norm": 0.9240100383758545,
"learning_rate": 2.4113862873696687e-06,
"loss": 0.6231,
"step": 10781
},
{
"epoch": 0.68,
"grad_norm": 0.8504339456558228,
"learning_rate": 2.410508554563495e-06,
"loss": 0.5734,
"step": 10782
},
{
"epoch": 0.68,
"grad_norm": 0.8871136903762817,
"learning_rate": 2.4096309307918013e-06,
"loss": 0.5962,
"step": 10783
},
{
"epoch": 0.68,
"grad_norm": 0.9689726829528809,
"learning_rate": 2.4087534160915364e-06,
"loss": 0.6037,
"step": 10784
},
{
"epoch": 0.68,
"grad_norm": 0.8371800780296326,
"learning_rate": 2.407876010499651e-06,
"loss": 0.6064,
"step": 10785
},
{
"epoch": 0.68,
"grad_norm": 0.9337158799171448,
"learning_rate": 2.4069987140530893e-06,
"loss": 0.5893,
"step": 10786
},
{
"epoch": 0.68,
"grad_norm": 0.8576418161392212,
"learning_rate": 2.4061215267887915e-06,
"loss": 0.5729,
"step": 10787
},
{
"epoch": 0.68,
"grad_norm": 0.859890341758728,
"learning_rate": 2.4052444487436925e-06,
"loss": 0.5478,
"step": 10788
},
{
"epoch": 0.68,
"grad_norm": 0.9343597292900085,
"learning_rate": 2.4043674799547252e-06,
"loss": 0.5488,
"step": 10789
},
{
"epoch": 0.68,
"grad_norm": 0.9209311008453369,
"learning_rate": 2.4034906204588104e-06,
"loss": 0.5923,
"step": 10790
},
{
"epoch": 0.68,
"grad_norm": 0.8784055709838867,
"learning_rate": 2.4026138702928763e-06,
"loss": 0.5471,
"step": 10791
},
{
"epoch": 0.68,
"grad_norm": 0.8925771117210388,
"learning_rate": 2.4017372294938347e-06,
"loss": 0.6078,
"step": 10792
},
{
"epoch": 0.68,
"grad_norm": 0.9076485633850098,
"learning_rate": 2.4008606980985994e-06,
"loss": 0.6088,
"step": 10793
},
{
"epoch": 0.68,
"grad_norm": 0.9135996699333191,
"learning_rate": 2.399984276144079e-06,
"loss": 0.6064,
"step": 10794
},
{
"epoch": 0.68,
"grad_norm": 0.8818286657333374,
"learning_rate": 2.3991079636671755e-06,
"loss": 0.5986,
"step": 10795
},
{
"epoch": 0.68,
"grad_norm": 0.8494421243667603,
"learning_rate": 2.398231760704788e-06,
"loss": 0.5909,
"step": 10796
},
{
"epoch": 0.68,
"grad_norm": 0.8972966074943542,
"learning_rate": 2.397355667293812e-06,
"loss": 0.5537,
"step": 10797
},
{
"epoch": 0.68,
"grad_norm": 0.884488582611084,
"learning_rate": 2.396479683471133e-06,
"loss": 0.6015,
"step": 10798
},
{
"epoch": 0.68,
"grad_norm": 0.8640215992927551,
"learning_rate": 2.395603809273635e-06,
"loss": 0.6178,
"step": 10799
},
{
"epoch": 0.68,
"grad_norm": 0.9510016441345215,
"learning_rate": 2.3947280447382055e-06,
"loss": 0.6046,
"step": 10800
},
{
"epoch": 0.68,
"grad_norm": 0.8891522288322449,
"learning_rate": 2.3938523899017124e-06,
"loss": 0.5477,
"step": 10801
},
{
"epoch": 0.68,
"grad_norm": 0.8761363625526428,
"learning_rate": 2.392976844801029e-06,
"loss": 0.5773,
"step": 10802
},
{
"epoch": 0.68,
"grad_norm": 0.8561110496520996,
"learning_rate": 2.3921014094730216e-06,
"loss": 0.5879,
"step": 10803
},
{
"epoch": 0.68,
"grad_norm": 0.8632552623748779,
"learning_rate": 2.3912260839545514e-06,
"loss": 0.5905,
"step": 10804
},
{
"epoch": 0.68,
"grad_norm": 0.8456622958183289,
"learning_rate": 2.390350868282478e-06,
"loss": 0.6465,
"step": 10805
},
{
"epoch": 0.68,
"grad_norm": 0.9606796503067017,
"learning_rate": 2.389475762493649e-06,
"loss": 0.5823,
"step": 10806
},
{
"epoch": 0.68,
"grad_norm": 0.8369455337524414,
"learning_rate": 2.3886007666249124e-06,
"loss": 0.5502,
"step": 10807
},
{
"epoch": 0.68,
"grad_norm": 0.8868955373764038,
"learning_rate": 2.387725880713117e-06,
"loss": 0.5796,
"step": 10808
},
{
"epoch": 0.68,
"grad_norm": 0.9382752180099487,
"learning_rate": 2.3868511047950955e-06,
"loss": 0.625,
"step": 10809
},
{
"epoch": 0.68,
"grad_norm": 0.9117169380187988,
"learning_rate": 2.3859764389076834e-06,
"loss": 0.5422,
"step": 10810
},
{
"epoch": 0.68,
"grad_norm": 0.8499246835708618,
"learning_rate": 2.3851018830877115e-06,
"loss": 0.5505,
"step": 10811
},
{
"epoch": 0.68,
"grad_norm": 0.9279850125312805,
"learning_rate": 2.3842274373719994e-06,
"loss": 0.5964,
"step": 10812
},
{
"epoch": 0.69,
"grad_norm": 0.9057535529136658,
"learning_rate": 2.383353101797374e-06,
"loss": 0.5497,
"step": 10813
},
{
"epoch": 0.69,
"grad_norm": 0.8557054996490479,
"learning_rate": 2.3824788764006446e-06,
"loss": 0.5935,
"step": 10814
},
{
"epoch": 0.69,
"grad_norm": 0.8106154203414917,
"learning_rate": 2.3816047612186243e-06,
"loss": 0.5335,
"step": 10815
},
{
"epoch": 0.69,
"grad_norm": 0.8800660371780396,
"learning_rate": 2.3807307562881188e-06,
"loss": 0.5915,
"step": 10816
},
{
"epoch": 0.69,
"grad_norm": 0.901800811290741,
"learning_rate": 2.3798568616459295e-06,
"loss": 0.6108,
"step": 10817
},
{
"epoch": 0.69,
"grad_norm": 0.9201937913894653,
"learning_rate": 2.378983077328853e-06,
"loss": 0.563,
"step": 10818
},
{
"epoch": 0.69,
"grad_norm": 0.8660761713981628,
"learning_rate": 2.378109403373683e-06,
"loss": 0.5706,
"step": 10819
},
{
"epoch": 0.69,
"grad_norm": 0.8939432501792908,
"learning_rate": 2.3772358398172013e-06,
"loss": 0.6153,
"step": 10820
},
{
"epoch": 0.69,
"grad_norm": 0.8241981863975525,
"learning_rate": 2.3763623866961984e-06,
"loss": 0.5213,
"step": 10821
},
{
"epoch": 0.69,
"grad_norm": 0.9262666702270508,
"learning_rate": 2.375489044047446e-06,
"loss": 0.5569,
"step": 10822
},
{
"epoch": 0.69,
"grad_norm": 0.9339314699172974,
"learning_rate": 2.37461581190772e-06,
"loss": 0.62,
"step": 10823
},
{
"epoch": 0.69,
"grad_norm": 0.8499922156333923,
"learning_rate": 2.37374269031379e-06,
"loss": 0.568,
"step": 10824
},
{
"epoch": 0.69,
"grad_norm": 0.8106879591941833,
"learning_rate": 2.3728696793024187e-06,
"loss": 0.5473,
"step": 10825
},
{
"epoch": 0.69,
"grad_norm": 0.9187078475952148,
"learning_rate": 2.371996778910366e-06,
"loss": 0.5985,
"step": 10826
},
{
"epoch": 0.69,
"grad_norm": 0.9437380433082581,
"learning_rate": 2.3711239891743886e-06,
"loss": 0.5821,
"step": 10827
},
{
"epoch": 0.69,
"grad_norm": 0.8216588497161865,
"learning_rate": 2.370251310131233e-06,
"loss": 0.4918,
"step": 10828
},
{
"epoch": 0.69,
"grad_norm": 0.8683214783668518,
"learning_rate": 2.369378741817647e-06,
"loss": 0.5689,
"step": 10829
},
{
"epoch": 0.69,
"grad_norm": 0.8889510035514832,
"learning_rate": 2.3685062842703697e-06,
"loss": 0.5838,
"step": 10830
},
{
"epoch": 0.69,
"grad_norm": 0.9381679892539978,
"learning_rate": 2.3676339375261394e-06,
"loss": 0.6048,
"step": 10831
},
{
"epoch": 0.69,
"grad_norm": 0.9227031469345093,
"learning_rate": 2.3667617016216885e-06,
"loss": 0.5528,
"step": 10832
},
{
"epoch": 0.69,
"grad_norm": 0.8656090497970581,
"learning_rate": 2.365889576593738e-06,
"loss": 0.5258,
"step": 10833
},
{
"epoch": 0.69,
"grad_norm": 0.9459344148635864,
"learning_rate": 2.365017562479016e-06,
"loss": 0.5723,
"step": 10834
},
{
"epoch": 0.69,
"grad_norm": 0.8714156150817871,
"learning_rate": 2.36414565931424e-06,
"loss": 0.5408,
"step": 10835
},
{
"epoch": 0.69,
"grad_norm": 0.9365402460098267,
"learning_rate": 2.3632738671361187e-06,
"loss": 0.5667,
"step": 10836
},
{
"epoch": 0.69,
"grad_norm": 0.952563464641571,
"learning_rate": 2.362402185981363e-06,
"loss": 0.5606,
"step": 10837
},
{
"epoch": 0.69,
"grad_norm": 0.8872142434120178,
"learning_rate": 2.3615306158866745e-06,
"loss": 0.6201,
"step": 10838
},
{
"epoch": 0.69,
"grad_norm": 0.9767509698867798,
"learning_rate": 2.360659156888754e-06,
"loss": 0.6305,
"step": 10839
},
{
"epoch": 0.69,
"grad_norm": 0.8186350464820862,
"learning_rate": 2.359787809024297e-06,
"loss": 0.5489,
"step": 10840
},
{
"epoch": 0.69,
"grad_norm": 0.891994297504425,
"learning_rate": 2.358916572329986e-06,
"loss": 0.5613,
"step": 10841
},
{
"epoch": 0.69,
"grad_norm": 0.8956696391105652,
"learning_rate": 2.3580454468425136e-06,
"loss": 0.558,
"step": 10842
},
{
"epoch": 0.69,
"grad_norm": 0.8858946561813354,
"learning_rate": 2.357174432598558e-06,
"loss": 0.5871,
"step": 10843
},
{
"epoch": 0.69,
"grad_norm": 0.9084068536758423,
"learning_rate": 2.356303529634791e-06,
"loss": 0.6036,
"step": 10844
},
{
"epoch": 0.69,
"grad_norm": 0.8893341422080994,
"learning_rate": 2.355432737987886e-06,
"loss": 0.6102,
"step": 10845
},
{
"epoch": 0.69,
"grad_norm": 0.8843010067939758,
"learning_rate": 2.3545620576945088e-06,
"loss": 0.5974,
"step": 10846
},
{
"epoch": 0.69,
"grad_norm": 0.9258445501327515,
"learning_rate": 2.3536914887913203e-06,
"loss": 0.5951,
"step": 10847
},
{
"epoch": 0.69,
"grad_norm": 0.8948039412498474,
"learning_rate": 2.3528210313149793e-06,
"loss": 0.556,
"step": 10848
},
{
"epoch": 0.69,
"grad_norm": 0.9238641262054443,
"learning_rate": 2.351950685302134e-06,
"loss": 0.6082,
"step": 10849
},
{
"epoch": 0.69,
"grad_norm": 0.9253204464912415,
"learning_rate": 2.351080450789431e-06,
"loss": 0.5741,
"step": 10850
},
{
"epoch": 0.69,
"grad_norm": 0.8795269727706909,
"learning_rate": 2.3502103278135203e-06,
"loss": 0.5695,
"step": 10851
},
{
"epoch": 0.69,
"grad_norm": 0.905057430267334,
"learning_rate": 2.349340316411032e-06,
"loss": 0.5569,
"step": 10852
},
{
"epoch": 0.69,
"grad_norm": 0.8453585505485535,
"learning_rate": 2.3484704166186024e-06,
"loss": 0.5595,
"step": 10853
},
{
"epoch": 0.69,
"grad_norm": 0.9011979699134827,
"learning_rate": 2.347600628472859e-06,
"loss": 0.5623,
"step": 10854
},
{
"epoch": 0.69,
"grad_norm": 0.874869704246521,
"learning_rate": 2.3467309520104265e-06,
"loss": 0.5435,
"step": 10855
},
{
"epoch": 0.69,
"grad_norm": 0.965836763381958,
"learning_rate": 2.3458613872679255e-06,
"loss": 0.6046,
"step": 10856
},
{
"epoch": 0.69,
"grad_norm": 0.8620368242263794,
"learning_rate": 2.344991934281966e-06,
"loss": 0.5409,
"step": 10857
},
{
"epoch": 0.69,
"grad_norm": 0.9628636240959167,
"learning_rate": 2.344122593089161e-06,
"loss": 0.6049,
"step": 10858
},
{
"epoch": 0.69,
"grad_norm": 0.8034865856170654,
"learning_rate": 2.3432533637261135e-06,
"loss": 0.5112,
"step": 10859
},
{
"epoch": 0.69,
"grad_norm": 0.8890984058380127,
"learning_rate": 2.3423842462294257e-06,
"loss": 0.5682,
"step": 10860
},
{
"epoch": 0.69,
"grad_norm": 0.9449944496154785,
"learning_rate": 2.341515240635691e-06,
"loss": 0.5434,
"step": 10861
},
{
"epoch": 0.69,
"grad_norm": 0.9405069947242737,
"learning_rate": 2.340646346981504e-06,
"loss": 0.5712,
"step": 10862
},
{
"epoch": 0.69,
"grad_norm": 0.9340393543243408,
"learning_rate": 2.339777565303444e-06,
"loss": 0.6118,
"step": 10863
},
{
"epoch": 0.69,
"grad_norm": 0.8473518490791321,
"learning_rate": 2.3389088956380982e-06,
"loss": 0.4942,
"step": 10864
},
{
"epoch": 0.69,
"grad_norm": 0.8445674777030945,
"learning_rate": 2.338040338022044e-06,
"loss": 0.548,
"step": 10865
},
{
"epoch": 0.69,
"grad_norm": 0.9048270583152771,
"learning_rate": 2.3371718924918487e-06,
"loss": 0.5564,
"step": 10866
},
{
"epoch": 0.69,
"grad_norm": 0.9229834675788879,
"learning_rate": 2.3363035590840814e-06,
"loss": 0.5522,
"step": 10867
},
{
"epoch": 0.69,
"grad_norm": 0.89118891954422,
"learning_rate": 2.3354353378353056e-06,
"loss": 0.5978,
"step": 10868
},
{
"epoch": 0.69,
"grad_norm": 0.8255208730697632,
"learning_rate": 2.334567228782078e-06,
"loss": 0.5616,
"step": 10869
},
{
"epoch": 0.69,
"grad_norm": 0.9787054061889648,
"learning_rate": 2.3336992319609534e-06,
"loss": 0.6014,
"step": 10870
},
{
"epoch": 0.69,
"grad_norm": 0.8942728638648987,
"learning_rate": 2.3328313474084755e-06,
"loss": 0.6,
"step": 10871
},
{
"epoch": 0.69,
"grad_norm": 0.8750494718551636,
"learning_rate": 2.3319635751611937e-06,
"loss": 0.5883,
"step": 10872
},
{
"epoch": 0.69,
"grad_norm": 0.8585361838340759,
"learning_rate": 2.3310959152556453e-06,
"loss": 0.6229,
"step": 10873
},
{
"epoch": 0.69,
"grad_norm": 0.9201778173446655,
"learning_rate": 2.3302283677283618e-06,
"loss": 0.596,
"step": 10874
},
{
"epoch": 0.69,
"grad_norm": 0.8352607488632202,
"learning_rate": 2.3293609326158745e-06,
"loss": 0.5342,
"step": 10875
},
{
"epoch": 0.69,
"grad_norm": 0.8795650005340576,
"learning_rate": 2.328493609954707e-06,
"loss": 0.5886,
"step": 10876
},
{
"epoch": 0.69,
"grad_norm": 0.9259792566299438,
"learning_rate": 2.3276263997813812e-06,
"loss": 0.5542,
"step": 10877
},
{
"epoch": 0.69,
"grad_norm": 0.933414876461029,
"learning_rate": 2.3267593021324127e-06,
"loss": 0.6461,
"step": 10878
},
{
"epoch": 0.69,
"grad_norm": 0.9607113599777222,
"learning_rate": 2.3258923170443087e-06,
"loss": 0.6609,
"step": 10879
},
{
"epoch": 0.69,
"grad_norm": 0.8267933130264282,
"learning_rate": 2.3250254445535743e-06,
"loss": 0.5456,
"step": 10880
},
{
"epoch": 0.69,
"grad_norm": 0.884596586227417,
"learning_rate": 2.324158684696717e-06,
"loss": 0.5046,
"step": 10881
},
{
"epoch": 0.69,
"grad_norm": 0.8587662577629089,
"learning_rate": 2.323292037510227e-06,
"loss": 0.4938,
"step": 10882
},
{
"epoch": 0.69,
"grad_norm": 0.9369049668312073,
"learning_rate": 2.3224255030305977e-06,
"loss": 0.5844,
"step": 10883
},
{
"epoch": 0.69,
"grad_norm": 0.830431342124939,
"learning_rate": 2.321559081294316e-06,
"loss": 0.5372,
"step": 10884
},
{
"epoch": 0.69,
"grad_norm": 0.9005808234214783,
"learning_rate": 2.3206927723378638e-06,
"loss": 0.5994,
"step": 10885
},
{
"epoch": 0.69,
"grad_norm": 0.9264594912528992,
"learning_rate": 2.3198265761977196e-06,
"loss": 0.5876,
"step": 10886
},
{
"epoch": 0.69,
"grad_norm": 0.871841549873352,
"learning_rate": 2.3189604929103533e-06,
"loss": 0.5646,
"step": 10887
},
{
"epoch": 0.69,
"grad_norm": 0.9147515296936035,
"learning_rate": 2.318094522512232e-06,
"loss": 0.6012,
"step": 10888
},
{
"epoch": 0.69,
"grad_norm": 0.8173208236694336,
"learning_rate": 2.3172286650398247e-06,
"loss": 0.5651,
"step": 10889
},
{
"epoch": 0.69,
"grad_norm": 0.8764269351959229,
"learning_rate": 2.3163629205295833e-06,
"loss": 0.5715,
"step": 10890
},
{
"epoch": 0.69,
"grad_norm": 0.8883751034736633,
"learning_rate": 2.3154972890179638e-06,
"loss": 0.563,
"step": 10891
},
{
"epoch": 0.69,
"grad_norm": 0.941449761390686,
"learning_rate": 2.3146317705414168e-06,
"loss": 0.5512,
"step": 10892
},
{
"epoch": 0.69,
"grad_norm": 0.8946614861488342,
"learning_rate": 2.31376636513638e-06,
"loss": 0.5787,
"step": 10893
},
{
"epoch": 0.69,
"grad_norm": 0.8711824417114258,
"learning_rate": 2.3129010728393012e-06,
"loss": 0.6297,
"step": 10894
},
{
"epoch": 0.69,
"grad_norm": 0.9191935658454895,
"learning_rate": 2.3120358936866084e-06,
"loss": 0.6161,
"step": 10895
},
{
"epoch": 0.69,
"grad_norm": 0.9106520414352417,
"learning_rate": 2.3111708277147333e-06,
"loss": 0.5355,
"step": 10896
},
{
"epoch": 0.69,
"grad_norm": 0.9305688142776489,
"learning_rate": 2.310305874960101e-06,
"loss": 0.6004,
"step": 10897
},
{
"epoch": 0.69,
"grad_norm": 0.9202895760536194,
"learning_rate": 2.3094410354591314e-06,
"loss": 0.6412,
"step": 10898
},
{
"epoch": 0.69,
"grad_norm": 0.9189572930335999,
"learning_rate": 2.30857630924824e-06,
"loss": 0.5783,
"step": 10899
},
{
"epoch": 0.69,
"grad_norm": 0.8673662543296814,
"learning_rate": 2.3077116963638396e-06,
"loss": 0.5999,
"step": 10900
},
{
"epoch": 0.69,
"grad_norm": 0.8311372399330139,
"learning_rate": 2.3068471968423296e-06,
"loss": 0.5998,
"step": 10901
},
{
"epoch": 0.69,
"grad_norm": 0.9261046051979065,
"learning_rate": 2.305982810720119e-06,
"loss": 0.6344,
"step": 10902
},
{
"epoch": 0.69,
"grad_norm": 0.8744479417800903,
"learning_rate": 2.3051185380335995e-06,
"loss": 0.556,
"step": 10903
},
{
"epoch": 0.69,
"grad_norm": 0.8244556784629822,
"learning_rate": 2.304254378819163e-06,
"loss": 0.5574,
"step": 10904
},
{
"epoch": 0.69,
"grad_norm": 0.8999570608139038,
"learning_rate": 2.3033903331131986e-06,
"loss": 0.609,
"step": 10905
},
{
"epoch": 0.69,
"grad_norm": 0.9004625082015991,
"learning_rate": 2.3025264009520833e-06,
"loss": 0.596,
"step": 10906
},
{
"epoch": 0.69,
"grad_norm": 0.8306798338890076,
"learning_rate": 2.3016625823721985e-06,
"loss": 0.5895,
"step": 10907
},
{
"epoch": 0.69,
"grad_norm": 0.9338074922561646,
"learning_rate": 2.300798877409918e-06,
"loss": 0.6051,
"step": 10908
},
{
"epoch": 0.69,
"grad_norm": 0.9324320554733276,
"learning_rate": 2.2999352861016042e-06,
"loss": 0.5728,
"step": 10909
},
{
"epoch": 0.69,
"grad_norm": 0.8564440608024597,
"learning_rate": 2.299071808483623e-06,
"loss": 0.5535,
"step": 10910
},
{
"epoch": 0.69,
"grad_norm": 0.9293292164802551,
"learning_rate": 2.2982084445923327e-06,
"loss": 0.625,
"step": 10911
},
{
"epoch": 0.69,
"grad_norm": 0.8664717078208923,
"learning_rate": 2.297345194464086e-06,
"loss": 0.5831,
"step": 10912
},
{
"epoch": 0.69,
"grad_norm": 0.8852226734161377,
"learning_rate": 2.2964820581352325e-06,
"loss": 0.6052,
"step": 10913
},
{
"epoch": 0.69,
"grad_norm": 0.8343265056610107,
"learning_rate": 2.295619035642111e-06,
"loss": 0.5413,
"step": 10914
},
{
"epoch": 0.69,
"grad_norm": 0.8824000358581543,
"learning_rate": 2.294756127021066e-06,
"loss": 0.5667,
"step": 10915
},
{
"epoch": 0.69,
"grad_norm": 0.9818698763847351,
"learning_rate": 2.2938933323084315e-06,
"loss": 0.6607,
"step": 10916
},
{
"epoch": 0.69,
"grad_norm": 0.8719751238822937,
"learning_rate": 2.293030651540534e-06,
"loss": 0.6078,
"step": 10917
},
{
"epoch": 0.69,
"grad_norm": 0.8655606508255005,
"learning_rate": 2.2921680847536976e-06,
"loss": 0.5596,
"step": 10918
},
{
"epoch": 0.69,
"grad_norm": 0.9295132160186768,
"learning_rate": 2.2913056319842436e-06,
"loss": 0.5886,
"step": 10919
},
{
"epoch": 0.69,
"grad_norm": 0.8664971590042114,
"learning_rate": 2.2904432932684865e-06,
"loss": 0.5728,
"step": 10920
},
{
"epoch": 0.69,
"grad_norm": 0.857593297958374,
"learning_rate": 2.289581068642737e-06,
"loss": 0.5657,
"step": 10921
},
{
"epoch": 0.69,
"grad_norm": 0.9390791058540344,
"learning_rate": 2.2887189581433016e-06,
"loss": 0.5587,
"step": 10922
},
{
"epoch": 0.69,
"grad_norm": 0.9358313679695129,
"learning_rate": 2.287856961806475e-06,
"loss": 0.6039,
"step": 10923
},
{
"epoch": 0.69,
"grad_norm": 0.862331211566925,
"learning_rate": 2.286995079668561e-06,
"loss": 0.5494,
"step": 10924
},
{
"epoch": 0.69,
"grad_norm": 0.841224193572998,
"learning_rate": 2.2861333117658442e-06,
"loss": 0.5516,
"step": 10925
},
{
"epoch": 0.69,
"grad_norm": 0.8751315474510193,
"learning_rate": 2.2852716581346124e-06,
"loss": 0.6054,
"step": 10926
},
{
"epoch": 0.69,
"grad_norm": 0.8290528655052185,
"learning_rate": 2.2844101188111477e-06,
"loss": 0.5849,
"step": 10927
},
{
"epoch": 0.69,
"grad_norm": 0.9079095125198364,
"learning_rate": 2.283548693831726e-06,
"loss": 0.5642,
"step": 10928
},
{
"epoch": 0.69,
"grad_norm": 0.9231603145599365,
"learning_rate": 2.2826873832326192e-06,
"loss": 0.5922,
"step": 10929
},
{
"epoch": 0.69,
"grad_norm": 0.9110752940177917,
"learning_rate": 2.2818261870500954e-06,
"loss": 0.5498,
"step": 10930
},
{
"epoch": 0.69,
"grad_norm": 0.8755868077278137,
"learning_rate": 2.280965105320411e-06,
"loss": 0.5596,
"step": 10931
},
{
"epoch": 0.69,
"grad_norm": 0.8865872621536255,
"learning_rate": 2.280104138079831e-06,
"loss": 0.6292,
"step": 10932
},
{
"epoch": 0.69,
"grad_norm": 0.8371679186820984,
"learning_rate": 2.2792432853646023e-06,
"loss": 0.5717,
"step": 10933
},
{
"epoch": 0.69,
"grad_norm": 0.9466820359230042,
"learning_rate": 2.2783825472109743e-06,
"loss": 0.5489,
"step": 10934
},
{
"epoch": 0.69,
"grad_norm": 0.8898562788963318,
"learning_rate": 2.277521923655189e-06,
"loss": 0.599,
"step": 10935
},
{
"epoch": 0.69,
"grad_norm": 0.904425323009491,
"learning_rate": 2.276661414733485e-06,
"loss": 0.5712,
"step": 10936
},
{
"epoch": 0.69,
"grad_norm": 0.936082661151886,
"learning_rate": 2.2758010204820945e-06,
"loss": 0.6113,
"step": 10937
},
{
"epoch": 0.69,
"grad_norm": 0.8919061422348022,
"learning_rate": 2.2749407409372487e-06,
"loss": 0.5971,
"step": 10938
},
{
"epoch": 0.69,
"grad_norm": 0.884014368057251,
"learning_rate": 2.2740805761351664e-06,
"loss": 0.5329,
"step": 10939
},
{
"epoch": 0.69,
"grad_norm": 0.9374119639396667,
"learning_rate": 2.273220526112068e-06,
"loss": 0.5739,
"step": 10940
},
{
"epoch": 0.69,
"grad_norm": 0.8741660714149475,
"learning_rate": 2.272360590904168e-06,
"loss": 0.5725,
"step": 10941
},
{
"epoch": 0.69,
"grad_norm": 0.8772330284118652,
"learning_rate": 2.2715007705476744e-06,
"loss": 0.5734,
"step": 10942
},
{
"epoch": 0.69,
"grad_norm": 0.9191374182701111,
"learning_rate": 2.2706410650787937e-06,
"loss": 0.5994,
"step": 10943
},
{
"epoch": 0.69,
"grad_norm": 0.8320372700691223,
"learning_rate": 2.2697814745337186e-06,
"loss": 0.5521,
"step": 10944
},
{
"epoch": 0.69,
"grad_norm": 0.835684597492218,
"learning_rate": 2.2689219989486506e-06,
"loss": 0.6022,
"step": 10945
},
{
"epoch": 0.69,
"grad_norm": 0.9576183557510376,
"learning_rate": 2.2680626383597782e-06,
"loss": 0.63,
"step": 10946
},
{
"epoch": 0.69,
"grad_norm": 0.877859354019165,
"learning_rate": 2.267203392803282e-06,
"loss": 0.5517,
"step": 10947
},
{
"epoch": 0.69,
"grad_norm": 0.9066639542579651,
"learning_rate": 2.266344262315345e-06,
"loss": 0.6017,
"step": 10948
},
{
"epoch": 0.69,
"grad_norm": 1.0002546310424805,
"learning_rate": 2.2654852469321405e-06,
"loss": 0.581,
"step": 10949
},
{
"epoch": 0.69,
"grad_norm": 1.0141836404800415,
"learning_rate": 2.26462634668984e-06,
"loss": 0.6754,
"step": 10950
},
{
"epoch": 0.69,
"grad_norm": 0.8675405979156494,
"learning_rate": 2.2637675616246103e-06,
"loss": 0.6055,
"step": 10951
},
{
"epoch": 0.69,
"grad_norm": 0.8971235752105713,
"learning_rate": 2.262908891772608e-06,
"loss": 0.6545,
"step": 10952
},
{
"epoch": 0.69,
"grad_norm": 0.8403980731964111,
"learning_rate": 2.2620503371699886e-06,
"loss": 0.5979,
"step": 10953
},
{
"epoch": 0.69,
"grad_norm": 0.948631227016449,
"learning_rate": 2.261191897852909e-06,
"loss": 0.5405,
"step": 10954
},
{
"epoch": 0.69,
"grad_norm": 0.8814859986305237,
"learning_rate": 2.260333573857509e-06,
"loss": 0.6071,
"step": 10955
},
{
"epoch": 0.69,
"grad_norm": 0.8945904970169067,
"learning_rate": 2.2594753652199313e-06,
"loss": 0.6018,
"step": 10956
},
{
"epoch": 0.69,
"grad_norm": 0.8850582838058472,
"learning_rate": 2.2586172719763126e-06,
"loss": 0.5572,
"step": 10957
},
{
"epoch": 0.69,
"grad_norm": 0.8527590036392212,
"learning_rate": 2.2577592941627842e-06,
"loss": 0.5759,
"step": 10958
},
{
"epoch": 0.69,
"grad_norm": 0.870134711265564,
"learning_rate": 2.2569014318154735e-06,
"loss": 0.576,
"step": 10959
},
{
"epoch": 0.69,
"grad_norm": 1.019551396369934,
"learning_rate": 2.2560436849704996e-06,
"loss": 0.6245,
"step": 10960
},
{
"epoch": 0.69,
"grad_norm": 0.9043488502502441,
"learning_rate": 2.255186053663979e-06,
"loss": 0.6191,
"step": 10961
},
{
"epoch": 0.69,
"grad_norm": 0.8273271918296814,
"learning_rate": 2.2543285379320283e-06,
"loss": 0.5481,
"step": 10962
},
{
"epoch": 0.69,
"grad_norm": 0.833625078201294,
"learning_rate": 2.2534711378107498e-06,
"loss": 0.5749,
"step": 10963
},
{
"epoch": 0.69,
"grad_norm": 0.9222172498703003,
"learning_rate": 2.2526138533362475e-06,
"loss": 0.5493,
"step": 10964
},
{
"epoch": 0.69,
"grad_norm": 0.9397459626197815,
"learning_rate": 2.2517566845446182e-06,
"loss": 0.5928,
"step": 10965
},
{
"epoch": 0.69,
"grad_norm": 0.8722630143165588,
"learning_rate": 2.2508996314719544e-06,
"loss": 0.5606,
"step": 10966
},
{
"epoch": 0.69,
"grad_norm": 0.870911717414856,
"learning_rate": 2.250042694154345e-06,
"loss": 0.5453,
"step": 10967
},
{
"epoch": 0.69,
"grad_norm": 0.8412066698074341,
"learning_rate": 2.2491858726278704e-06,
"loss": 0.5291,
"step": 10968
},
{
"epoch": 0.69,
"grad_norm": 0.8742692470550537,
"learning_rate": 2.248329166928609e-06,
"loss": 0.6004,
"step": 10969
},
{
"epoch": 0.7,
"grad_norm": 0.9923862814903259,
"learning_rate": 2.2474725770926337e-06,
"loss": 0.552,
"step": 10970
},
{
"epoch": 0.7,
"grad_norm": 0.9316403865814209,
"learning_rate": 2.2466161031560136e-06,
"loss": 0.6006,
"step": 10971
},
{
"epoch": 0.7,
"grad_norm": 0.8640490174293518,
"learning_rate": 2.2457597451548102e-06,
"loss": 0.521,
"step": 10972
},
{
"epoch": 0.7,
"grad_norm": 0.8956085443496704,
"learning_rate": 2.2449035031250847e-06,
"loss": 0.5854,
"step": 10973
},
{
"epoch": 0.7,
"grad_norm": 0.8568456768989563,
"learning_rate": 2.2440473771028855e-06,
"loss": 0.5973,
"step": 10974
},
{
"epoch": 0.7,
"grad_norm": 0.8945633769035339,
"learning_rate": 2.2431913671242666e-06,
"loss": 0.6168,
"step": 10975
},
{
"epoch": 0.7,
"grad_norm": 0.8528943657875061,
"learning_rate": 2.242335473225268e-06,
"loss": 0.5569,
"step": 10976
},
{
"epoch": 0.7,
"grad_norm": 0.8728605508804321,
"learning_rate": 2.2414796954419286e-06,
"loss": 0.5558,
"step": 10977
},
{
"epoch": 0.7,
"grad_norm": 0.8296922445297241,
"learning_rate": 2.2406240338102836e-06,
"loss": 0.5519,
"step": 10978
},
{
"epoch": 0.7,
"grad_norm": 0.9309175610542297,
"learning_rate": 2.239768488366361e-06,
"loss": 0.6021,
"step": 10979
},
{
"epoch": 0.7,
"grad_norm": 0.8594921827316284,
"learning_rate": 2.2389130591461855e-06,
"loss": 0.5878,
"step": 10980
},
{
"epoch": 0.7,
"grad_norm": 0.9349560737609863,
"learning_rate": 2.2380577461857777e-06,
"loss": 0.5937,
"step": 10981
},
{
"epoch": 0.7,
"grad_norm": 0.8946079611778259,
"learning_rate": 2.2372025495211465e-06,
"loss": 0.6016,
"step": 10982
},
{
"epoch": 0.7,
"grad_norm": 0.8826418519020081,
"learning_rate": 2.236347469188308e-06,
"loss": 0.6071,
"step": 10983
},
{
"epoch": 0.7,
"grad_norm": 0.9132988452911377,
"learning_rate": 2.2354925052232625e-06,
"loss": 0.5728,
"step": 10984
},
{
"epoch": 0.7,
"grad_norm": 0.8709650039672852,
"learning_rate": 2.2346376576620103e-06,
"loss": 0.5415,
"step": 10985
},
{
"epoch": 0.7,
"grad_norm": 0.982613205909729,
"learning_rate": 2.2337829265405466e-06,
"loss": 0.5818,
"step": 10986
},
{
"epoch": 0.7,
"grad_norm": 0.9030888676643372,
"learning_rate": 2.2329283118948604e-06,
"loss": 0.5771,
"step": 10987
},
{
"epoch": 0.7,
"grad_norm": 0.8313351273536682,
"learning_rate": 2.232073813760937e-06,
"loss": 0.6115,
"step": 10988
},
{
"epoch": 0.7,
"grad_norm": 0.8704630136489868,
"learning_rate": 2.2312194321747582e-06,
"loss": 0.5722,
"step": 10989
},
{
"epoch": 0.7,
"grad_norm": 0.9125388264656067,
"learning_rate": 2.230365167172296e-06,
"loss": 0.5761,
"step": 10990
},
{
"epoch": 0.7,
"grad_norm": 0.9968715906143188,
"learning_rate": 2.2295110187895215e-06,
"loss": 0.5553,
"step": 10991
},
{
"epoch": 0.7,
"grad_norm": 0.8663219809532166,
"learning_rate": 2.2286569870624e-06,
"loss": 0.5965,
"step": 10992
},
{
"epoch": 0.7,
"grad_norm": 0.899998664855957,
"learning_rate": 2.227803072026892e-06,
"loss": 0.5645,
"step": 10993
},
{
"epoch": 0.7,
"grad_norm": 0.9184356927871704,
"learning_rate": 2.226949273718953e-06,
"loss": 0.6109,
"step": 10994
},
{
"epoch": 0.7,
"grad_norm": 0.9209024906158447,
"learning_rate": 2.226095592174533e-06,
"loss": 0.6007,
"step": 10995
},
{
"epoch": 0.7,
"grad_norm": 0.812882125377655,
"learning_rate": 2.2252420274295782e-06,
"loss": 0.5157,
"step": 10996
},
{
"epoch": 0.7,
"grad_norm": 0.8180590271949768,
"learning_rate": 2.224388579520031e-06,
"loss": 0.5697,
"step": 10997
},
{
"epoch": 0.7,
"grad_norm": 0.9130131602287292,
"learning_rate": 2.2235352484818228e-06,
"loss": 0.5966,
"step": 10998
},
{
"epoch": 0.7,
"grad_norm": 0.8782884478569031,
"learning_rate": 2.222682034350887e-06,
"loss": 0.5733,
"step": 10999
},
{
"epoch": 0.7,
"grad_norm": 0.912164032459259,
"learning_rate": 2.221828937163149e-06,
"loss": 0.5988,
"step": 11000
},
{
"epoch": 0.7,
"grad_norm": 0.8906491994857788,
"learning_rate": 2.22097595695453e-06,
"loss": 0.5479,
"step": 11001
},
{
"epoch": 0.7,
"grad_norm": 0.8602820634841919,
"learning_rate": 2.220123093760946e-06,
"loss": 0.5901,
"step": 11002
},
{
"epoch": 0.7,
"grad_norm": 0.9242262244224548,
"learning_rate": 2.2192703476183093e-06,
"loss": 0.5587,
"step": 11003
},
{
"epoch": 0.7,
"grad_norm": 0.919808566570282,
"learning_rate": 2.2184177185625217e-06,
"loss": 0.6188,
"step": 11004
},
{
"epoch": 0.7,
"grad_norm": 0.8946382999420166,
"learning_rate": 2.217565206629491e-06,
"loss": 0.5634,
"step": 11005
},
{
"epoch": 0.7,
"grad_norm": 0.8390125632286072,
"learning_rate": 2.2167128118551084e-06,
"loss": 0.5738,
"step": 11006
},
{
"epoch": 0.7,
"grad_norm": 0.8909174203872681,
"learning_rate": 2.2158605342752667e-06,
"loss": 0.6098,
"step": 11007
},
{
"epoch": 0.7,
"grad_norm": 0.851243257522583,
"learning_rate": 2.2150083739258525e-06,
"loss": 0.5296,
"step": 11008
},
{
"epoch": 0.7,
"grad_norm": 0.866870641708374,
"learning_rate": 2.214156330842748e-06,
"loss": 0.5695,
"step": 11009
},
{
"epoch": 0.7,
"grad_norm": 0.8538164496421814,
"learning_rate": 2.2133044050618286e-06,
"loss": 0.5897,
"step": 11010
},
{
"epoch": 0.7,
"grad_norm": 0.8874875903129578,
"learning_rate": 2.2124525966189685e-06,
"loss": 0.5415,
"step": 11011
},
{
"epoch": 0.7,
"grad_norm": 0.9058599472045898,
"learning_rate": 2.211600905550029e-06,
"loss": 0.5472,
"step": 11012
},
{
"epoch": 0.7,
"grad_norm": 0.8872851729393005,
"learning_rate": 2.2107493318908785e-06,
"loss": 0.5927,
"step": 11013
},
{
"epoch": 0.7,
"grad_norm": 0.8519189953804016,
"learning_rate": 2.2098978756773687e-06,
"loss": 0.5767,
"step": 11014
},
{
"epoch": 0.7,
"grad_norm": 0.9283721446990967,
"learning_rate": 2.2090465369453533e-06,
"loss": 0.6112,
"step": 11015
},
{
"epoch": 0.7,
"grad_norm": 0.863922655582428,
"learning_rate": 2.208195315730681e-06,
"loss": 0.5684,
"step": 11016
},
{
"epoch": 0.7,
"grad_norm": 0.9413056969642639,
"learning_rate": 2.207344212069189e-06,
"loss": 0.6224,
"step": 11017
},
{
"epoch": 0.7,
"grad_norm": 0.92134690284729,
"learning_rate": 2.2064932259967188e-06,
"loss": 0.6007,
"step": 11018
},
{
"epoch": 0.7,
"grad_norm": 0.8620043396949768,
"learning_rate": 2.2056423575491026e-06,
"loss": 0.6119,
"step": 11019
},
{
"epoch": 0.7,
"grad_norm": 0.9397025108337402,
"learning_rate": 2.204791606762164e-06,
"loss": 0.6156,
"step": 11020
},
{
"epoch": 0.7,
"grad_norm": 0.8872060179710388,
"learning_rate": 2.2039409736717273e-06,
"loss": 0.5733,
"step": 11021
},
{
"epoch": 0.7,
"grad_norm": 0.8693039417266846,
"learning_rate": 2.2030904583136085e-06,
"loss": 0.587,
"step": 11022
},
{
"epoch": 0.7,
"grad_norm": 0.8837648034095764,
"learning_rate": 2.2022400607236214e-06,
"loss": 0.6031,
"step": 11023
},
{
"epoch": 0.7,
"grad_norm": 0.848081648349762,
"learning_rate": 2.2013897809375753e-06,
"loss": 0.6094,
"step": 11024
},
{
"epoch": 0.7,
"grad_norm": 0.945680558681488,
"learning_rate": 2.2005396189912647e-06,
"loss": 0.5531,
"step": 11025
},
{
"epoch": 0.7,
"grad_norm": 0.8282198309898376,
"learning_rate": 2.199689574920495e-06,
"loss": 0.5546,
"step": 11026
},
{
"epoch": 0.7,
"grad_norm": 0.8442526459693909,
"learning_rate": 2.198839648761057e-06,
"loss": 0.5722,
"step": 11027
},
{
"epoch": 0.7,
"grad_norm": 0.8687816858291626,
"learning_rate": 2.1979898405487354e-06,
"loss": 0.5748,
"step": 11028
},
{
"epoch": 0.7,
"grad_norm": 0.8658022284507751,
"learning_rate": 2.197140150319314e-06,
"loss": 0.5626,
"step": 11029
},
{
"epoch": 0.7,
"grad_norm": 0.905732274055481,
"learning_rate": 2.19629057810857e-06,
"loss": 0.6178,
"step": 11030
},
{
"epoch": 0.7,
"grad_norm": 0.8981587290763855,
"learning_rate": 2.195441123952277e-06,
"loss": 0.5984,
"step": 11031
},
{
"epoch": 0.7,
"grad_norm": 0.91184002161026,
"learning_rate": 2.1945917878862037e-06,
"loss": 0.5684,
"step": 11032
},
{
"epoch": 0.7,
"grad_norm": 0.8562777042388916,
"learning_rate": 2.193742569946109e-06,
"loss": 0.5981,
"step": 11033
},
{
"epoch": 0.7,
"grad_norm": 0.842792272567749,
"learning_rate": 2.1928934701677507e-06,
"loss": 0.5707,
"step": 11034
},
{
"epoch": 0.7,
"grad_norm": 0.8843762278556824,
"learning_rate": 2.1920444885868862e-06,
"loss": 0.5663,
"step": 11035
},
{
"epoch": 0.7,
"grad_norm": 0.8537455201148987,
"learning_rate": 2.1911956252392593e-06,
"loss": 0.5627,
"step": 11036
},
{
"epoch": 0.7,
"grad_norm": 0.9044625163078308,
"learning_rate": 2.1903468801606125e-06,
"loss": 0.5699,
"step": 11037
},
{
"epoch": 0.7,
"grad_norm": 0.8944157958030701,
"learning_rate": 2.1894982533866852e-06,
"loss": 0.5917,
"step": 11038
},
{
"epoch": 0.7,
"grad_norm": 0.8653507232666016,
"learning_rate": 2.188649744953209e-06,
"loss": 0.5735,
"step": 11039
},
{
"epoch": 0.7,
"grad_norm": 0.9325670003890991,
"learning_rate": 2.1878013548959145e-06,
"loss": 0.5949,
"step": 11040
},
{
"epoch": 0.7,
"grad_norm": 0.9095918536186218,
"learning_rate": 2.186953083250519e-06,
"loss": 0.632,
"step": 11041
},
{
"epoch": 0.7,
"grad_norm": 0.8838375806808472,
"learning_rate": 2.1861049300527426e-06,
"loss": 0.5716,
"step": 11042
},
{
"epoch": 0.7,
"grad_norm": 0.9739626049995422,
"learning_rate": 2.1852568953383025e-06,
"loss": 0.5902,
"step": 11043
},
{
"epoch": 0.7,
"grad_norm": 0.8509230017662048,
"learning_rate": 2.1844089791429002e-06,
"loss": 0.5383,
"step": 11044
},
{
"epoch": 0.7,
"grad_norm": 0.8938042521476746,
"learning_rate": 2.1835611815022412e-06,
"loss": 0.6017,
"step": 11045
},
{
"epoch": 0.7,
"grad_norm": 0.8988103866577148,
"learning_rate": 2.182713502452025e-06,
"loss": 0.5487,
"step": 11046
},
{
"epoch": 0.7,
"grad_norm": 0.8871778845787048,
"learning_rate": 2.181865942027939e-06,
"loss": 0.5941,
"step": 11047
},
{
"epoch": 0.7,
"grad_norm": 0.9358175992965698,
"learning_rate": 2.181018500265679e-06,
"loss": 0.6045,
"step": 11048
},
{
"epoch": 0.7,
"grad_norm": 0.8977616429328918,
"learning_rate": 2.1801711772009203e-06,
"loss": 0.5579,
"step": 11049
},
{
"epoch": 0.7,
"grad_norm": 0.8415703177452087,
"learning_rate": 2.179323972869345e-06,
"loss": 0.6,
"step": 11050
},
{
"epoch": 0.7,
"grad_norm": 0.8636377453804016,
"learning_rate": 2.1784768873066243e-06,
"loss": 0.5714,
"step": 11051
},
{
"epoch": 0.7,
"grad_norm": 0.9031801223754883,
"learning_rate": 2.1776299205484265e-06,
"loss": 0.5426,
"step": 11052
},
{
"epoch": 0.7,
"grad_norm": 0.9137712121009827,
"learning_rate": 2.176783072630414e-06,
"loss": 0.569,
"step": 11053
},
{
"epoch": 0.7,
"grad_norm": 0.8924576044082642,
"learning_rate": 2.1759363435882475e-06,
"loss": 0.5756,
"step": 11054
},
{
"epoch": 0.7,
"grad_norm": 0.8783155083656311,
"learning_rate": 2.1750897334575736e-06,
"loss": 0.5775,
"step": 11055
},
{
"epoch": 0.7,
"grad_norm": 0.8635226488113403,
"learning_rate": 2.174243242274047e-06,
"loss": 0.5589,
"step": 11056
},
{
"epoch": 0.7,
"grad_norm": 0.9452078938484192,
"learning_rate": 2.1733968700733066e-06,
"loss": 0.6036,
"step": 11057
},
{
"epoch": 0.7,
"grad_norm": 0.9600135684013367,
"learning_rate": 2.1725506168909903e-06,
"loss": 0.6068,
"step": 11058
},
{
"epoch": 0.7,
"grad_norm": 0.8594204187393188,
"learning_rate": 2.1717044827627314e-06,
"loss": 0.5309,
"step": 11059
},
{
"epoch": 0.7,
"grad_norm": 0.9110593199729919,
"learning_rate": 2.1708584677241586e-06,
"loss": 0.6443,
"step": 11060
},
{
"epoch": 0.7,
"grad_norm": 0.8541653156280518,
"learning_rate": 2.170012571810893e-06,
"loss": 0.532,
"step": 11061
},
{
"epoch": 0.7,
"grad_norm": 0.8981989622116089,
"learning_rate": 2.1691667950585552e-06,
"loss": 0.5661,
"step": 11062
},
{
"epoch": 0.7,
"grad_norm": 0.8796327710151672,
"learning_rate": 2.1683211375027543e-06,
"loss": 0.6167,
"step": 11063
},
{
"epoch": 0.7,
"grad_norm": 0.8893603086471558,
"learning_rate": 2.1674755991790976e-06,
"loss": 0.6118,
"step": 11064
},
{
"epoch": 0.7,
"grad_norm": 0.8742503523826599,
"learning_rate": 2.1666301801231937e-06,
"loss": 0.5821,
"step": 11065
},
{
"epoch": 0.7,
"grad_norm": 0.9033846259117126,
"learning_rate": 2.1657848803706344e-06,
"loss": 0.589,
"step": 11066
},
{
"epoch": 0.7,
"grad_norm": 0.8721830248832703,
"learning_rate": 2.1649396999570137e-06,
"loss": 0.5571,
"step": 11067
},
{
"epoch": 0.7,
"grad_norm": 0.8391421437263489,
"learning_rate": 2.1640946389179207e-06,
"loss": 0.5393,
"step": 11068
},
{
"epoch": 0.7,
"grad_norm": 0.8839281797409058,
"learning_rate": 2.1632496972889366e-06,
"loss": 0.5608,
"step": 11069
},
{
"epoch": 0.7,
"grad_norm": 0.9285804033279419,
"learning_rate": 2.162404875105641e-06,
"loss": 0.6456,
"step": 11070
},
{
"epoch": 0.7,
"grad_norm": 0.908049464225769,
"learning_rate": 2.1615601724036033e-06,
"loss": 0.5772,
"step": 11071
},
{
"epoch": 0.7,
"grad_norm": 0.8621760010719299,
"learning_rate": 2.1607155892183905e-06,
"loss": 0.566,
"step": 11072
},
{
"epoch": 0.7,
"grad_norm": 0.8190118670463562,
"learning_rate": 2.1598711255855713e-06,
"loss": 0.5507,
"step": 11073
},
{
"epoch": 0.7,
"grad_norm": 0.9111891984939575,
"learning_rate": 2.1590267815406968e-06,
"loss": 0.5911,
"step": 11074
},
{
"epoch": 0.7,
"grad_norm": 0.9198263883590698,
"learning_rate": 2.1581825571193216e-06,
"loss": 0.5695,
"step": 11075
},
{
"epoch": 0.7,
"grad_norm": 0.863237202167511,
"learning_rate": 2.1573384523569945e-06,
"loss": 0.5976,
"step": 11076
},
{
"epoch": 0.7,
"grad_norm": 0.914863109588623,
"learning_rate": 2.1564944672892524e-06,
"loss": 0.5644,
"step": 11077
},
{
"epoch": 0.7,
"grad_norm": 0.8742169141769409,
"learning_rate": 2.1556506019516405e-06,
"loss": 0.5602,
"step": 11078
},
{
"epoch": 0.7,
"grad_norm": 0.862916886806488,
"learning_rate": 2.1548068563796855e-06,
"loss": 0.5522,
"step": 11079
},
{
"epoch": 0.7,
"grad_norm": 0.8305811882019043,
"learning_rate": 2.1539632306089153e-06,
"loss": 0.543,
"step": 11080
},
{
"epoch": 0.7,
"grad_norm": 0.8726207613945007,
"learning_rate": 2.153119724674853e-06,
"loss": 0.5805,
"step": 11081
},
{
"epoch": 0.7,
"grad_norm": 0.9057608246803284,
"learning_rate": 2.1522763386130156e-06,
"loss": 0.5893,
"step": 11082
},
{
"epoch": 0.7,
"grad_norm": 0.8841626644134521,
"learning_rate": 2.1514330724589156e-06,
"loss": 0.5744,
"step": 11083
},
{
"epoch": 0.7,
"grad_norm": 0.9211553931236267,
"learning_rate": 2.1505899262480607e-06,
"loss": 0.6397,
"step": 11084
},
{
"epoch": 0.7,
"grad_norm": 1.0019750595092773,
"learning_rate": 2.149746900015948e-06,
"loss": 0.6107,
"step": 11085
},
{
"epoch": 0.7,
"grad_norm": 0.8821682929992676,
"learning_rate": 2.148903993798082e-06,
"loss": 0.5628,
"step": 11086
},
{
"epoch": 0.7,
"grad_norm": 0.874483048915863,
"learning_rate": 2.148061207629949e-06,
"loss": 0.6007,
"step": 11087
},
{
"epoch": 0.7,
"grad_norm": 0.8956743478775024,
"learning_rate": 2.1472185415470365e-06,
"loss": 0.5535,
"step": 11088
},
{
"epoch": 0.7,
"grad_norm": 0.9166892766952515,
"learning_rate": 2.1463759955848277e-06,
"loss": 0.551,
"step": 11089
},
{
"epoch": 0.7,
"grad_norm": 0.8699899911880493,
"learning_rate": 2.1455335697787987e-06,
"loss": 0.6144,
"step": 11090
},
{
"epoch": 0.7,
"grad_norm": 0.8954592347145081,
"learning_rate": 2.1446912641644206e-06,
"loss": 0.5944,
"step": 11091
},
{
"epoch": 0.7,
"grad_norm": 0.8515070080757141,
"learning_rate": 2.1438490787771634e-06,
"loss": 0.5716,
"step": 11092
},
{
"epoch": 0.7,
"grad_norm": 0.8956817388534546,
"learning_rate": 2.1430070136524826e-06,
"loss": 0.555,
"step": 11093
},
{
"epoch": 0.7,
"grad_norm": 0.9041964411735535,
"learning_rate": 2.1421650688258384e-06,
"loss": 0.5484,
"step": 11094
},
{
"epoch": 0.7,
"grad_norm": 0.8969672322273254,
"learning_rate": 2.1413232443326813e-06,
"loss": 0.6186,
"step": 11095
},
{
"epoch": 0.7,
"grad_norm": 0.9423562288284302,
"learning_rate": 2.140481540208458e-06,
"loss": 0.6008,
"step": 11096
},
{
"epoch": 0.7,
"grad_norm": 0.8758630156517029,
"learning_rate": 2.1396399564886113e-06,
"loss": 0.5685,
"step": 11097
},
{
"epoch": 0.7,
"grad_norm": 0.8483191728591919,
"learning_rate": 2.1387984932085714e-06,
"loss": 0.5524,
"step": 11098
},
{
"epoch": 0.7,
"grad_norm": 0.9557432532310486,
"learning_rate": 2.1379571504037754e-06,
"loss": 0.6017,
"step": 11099
},
{
"epoch": 0.7,
"grad_norm": 0.9311283230781555,
"learning_rate": 2.1371159281096497e-06,
"loss": 0.5575,
"step": 11100
},
{
"epoch": 0.7,
"grad_norm": 0.8768463134765625,
"learning_rate": 2.1362748263616112e-06,
"loss": 0.6389,
"step": 11101
},
{
"epoch": 0.7,
"grad_norm": 0.926892876625061,
"learning_rate": 2.1354338451950774e-06,
"loss": 0.5177,
"step": 11102
},
{
"epoch": 0.7,
"grad_norm": 0.8700109124183655,
"learning_rate": 2.1345929846454593e-06,
"loss": 0.5717,
"step": 11103
},
{
"epoch": 0.7,
"grad_norm": 0.9436931014060974,
"learning_rate": 2.133752244748163e-06,
"loss": 0.5981,
"step": 11104
},
{
"epoch": 0.7,
"grad_norm": 0.8545697927474976,
"learning_rate": 2.1329116255385902e-06,
"loss": 0.559,
"step": 11105
},
{
"epoch": 0.7,
"grad_norm": 1.0063272714614868,
"learning_rate": 2.132071127052131e-06,
"loss": 0.6422,
"step": 11106
},
{
"epoch": 0.7,
"grad_norm": 0.8825348019599915,
"learning_rate": 2.1312307493241825e-06,
"loss": 0.5503,
"step": 11107
},
{
"epoch": 0.7,
"grad_norm": 0.837097704410553,
"learning_rate": 2.1303904923901288e-06,
"loss": 0.5861,
"step": 11108
},
{
"epoch": 0.7,
"grad_norm": 0.8669401407241821,
"learning_rate": 2.1295503562853466e-06,
"loss": 0.5819,
"step": 11109
},
{
"epoch": 0.7,
"grad_norm": 0.9110631346702576,
"learning_rate": 2.1287103410452135e-06,
"loss": 0.5713,
"step": 11110
},
{
"epoch": 0.7,
"grad_norm": 0.8955477476119995,
"learning_rate": 2.1278704467050996e-06,
"loss": 0.6162,
"step": 11111
},
{
"epoch": 0.7,
"grad_norm": 0.8396604061126709,
"learning_rate": 2.1270306733003697e-06,
"loss": 0.5518,
"step": 11112
},
{
"epoch": 0.7,
"grad_norm": 0.9126778841018677,
"learning_rate": 2.126191020866386e-06,
"loss": 0.6153,
"step": 11113
},
{
"epoch": 0.7,
"grad_norm": 0.9138805866241455,
"learning_rate": 2.125351489438499e-06,
"loss": 0.5754,
"step": 11114
},
{
"epoch": 0.7,
"grad_norm": 0.8429421782493591,
"learning_rate": 2.124512079052059e-06,
"loss": 0.5369,
"step": 11115
},
{
"epoch": 0.7,
"grad_norm": 0.8413889408111572,
"learning_rate": 2.123672789742416e-06,
"loss": 0.5845,
"step": 11116
},
{
"epoch": 0.7,
"grad_norm": 0.9169177412986755,
"learning_rate": 2.1228336215449036e-06,
"loss": 0.5887,
"step": 11117
},
{
"epoch": 0.7,
"grad_norm": 0.8493983745574951,
"learning_rate": 2.1219945744948584e-06,
"loss": 0.5727,
"step": 11118
},
{
"epoch": 0.7,
"grad_norm": 0.8498938083648682,
"learning_rate": 2.12115564862761e-06,
"loss": 0.5381,
"step": 11119
},
{
"epoch": 0.7,
"grad_norm": 0.8709191083908081,
"learning_rate": 2.1203168439784828e-06,
"loss": 0.5768,
"step": 11120
},
{
"epoch": 0.7,
"grad_norm": 0.8937205076217651,
"learning_rate": 2.119478160582797e-06,
"loss": 0.6019,
"step": 11121
},
{
"epoch": 0.7,
"grad_norm": 0.8882603645324707,
"learning_rate": 2.1186395984758633e-06,
"loss": 0.5199,
"step": 11122
},
{
"epoch": 0.7,
"grad_norm": 0.8753896951675415,
"learning_rate": 2.117801157692993e-06,
"loss": 0.5835,
"step": 11123
},
{
"epoch": 0.7,
"grad_norm": 0.92037034034729,
"learning_rate": 2.1169628382694894e-06,
"loss": 0.5877,
"step": 11124
},
{
"epoch": 0.7,
"grad_norm": 0.8579007983207703,
"learning_rate": 2.1161246402406518e-06,
"loss": 0.5617,
"step": 11125
},
{
"epoch": 0.7,
"grad_norm": 0.8872489929199219,
"learning_rate": 2.1152865636417723e-06,
"loss": 0.5598,
"step": 11126
},
{
"epoch": 0.7,
"grad_norm": 0.9075922966003418,
"learning_rate": 2.114448608508143e-06,
"loss": 0.5544,
"step": 11127
},
{
"epoch": 0.71,
"grad_norm": 0.8997741937637329,
"learning_rate": 2.113610774875041e-06,
"loss": 0.5516,
"step": 11128
},
{
"epoch": 0.71,
"grad_norm": 0.8664461970329285,
"learning_rate": 2.1127730627777497e-06,
"loss": 0.5554,
"step": 11129
},
{
"epoch": 0.71,
"grad_norm": 0.9150891900062561,
"learning_rate": 2.111935472251543e-06,
"loss": 0.5794,
"step": 11130
},
{
"epoch": 0.71,
"grad_norm": 0.9180150628089905,
"learning_rate": 2.1110980033316846e-06,
"loss": 0.5755,
"step": 11131
},
{
"epoch": 0.71,
"grad_norm": 0.9424551129341125,
"learning_rate": 2.1102606560534393e-06,
"loss": 0.6034,
"step": 11132
},
{
"epoch": 0.71,
"grad_norm": 0.893530011177063,
"learning_rate": 2.1094234304520655e-06,
"loss": 0.6,
"step": 11133
},
{
"epoch": 0.71,
"grad_norm": 0.9302678108215332,
"learning_rate": 2.108586326562816e-06,
"loss": 0.6037,
"step": 11134
},
{
"epoch": 0.71,
"grad_norm": 0.9196210503578186,
"learning_rate": 2.1077493444209385e-06,
"loss": 0.6183,
"step": 11135
},
{
"epoch": 0.71,
"grad_norm": 0.9748576879501343,
"learning_rate": 2.1069124840616717e-06,
"loss": 0.5905,
"step": 11136
},
{
"epoch": 0.71,
"grad_norm": 0.8754902482032776,
"learning_rate": 2.1060757455202574e-06,
"loss": 0.5659,
"step": 11137
},
{
"epoch": 0.71,
"grad_norm": 0.9038988947868347,
"learning_rate": 2.1052391288319285e-06,
"loss": 0.6012,
"step": 11138
},
{
"epoch": 0.71,
"grad_norm": 0.9287976026535034,
"learning_rate": 2.1044026340319075e-06,
"loss": 0.5397,
"step": 11139
},
{
"epoch": 0.71,
"grad_norm": 0.9258267879486084,
"learning_rate": 2.1035662611554187e-06,
"loss": 0.586,
"step": 11140
},
{
"epoch": 0.71,
"grad_norm": 0.9034359455108643,
"learning_rate": 2.1027300102376787e-06,
"loss": 0.5757,
"step": 11141
},
{
"epoch": 0.71,
"grad_norm": 0.8997130393981934,
"learning_rate": 2.101893881313899e-06,
"loss": 0.5749,
"step": 11142
},
{
"epoch": 0.71,
"grad_norm": 0.8986077308654785,
"learning_rate": 2.1010578744192885e-06,
"loss": 0.6169,
"step": 11143
},
{
"epoch": 0.71,
"grad_norm": 0.9187172055244446,
"learning_rate": 2.1002219895890435e-06,
"loss": 0.6212,
"step": 11144
},
{
"epoch": 0.71,
"grad_norm": 0.8708627223968506,
"learning_rate": 2.099386226858362e-06,
"loss": 0.5887,
"step": 11145
},
{
"epoch": 0.71,
"grad_norm": 0.8534128665924072,
"learning_rate": 2.098550586262439e-06,
"loss": 0.597,
"step": 11146
},
{
"epoch": 0.71,
"grad_norm": 0.8883056044578552,
"learning_rate": 2.097715067836456e-06,
"loss": 0.5597,
"step": 11147
},
{
"epoch": 0.71,
"grad_norm": 0.9250147938728333,
"learning_rate": 2.096879671615595e-06,
"loss": 0.6114,
"step": 11148
},
{
"epoch": 0.71,
"grad_norm": 0.9054756164550781,
"learning_rate": 2.0960443976350315e-06,
"loss": 0.6134,
"step": 11149
},
{
"epoch": 0.71,
"grad_norm": 0.9181949496269226,
"learning_rate": 2.0952092459299366e-06,
"loss": 0.6393,
"step": 11150
},
{
"epoch": 0.71,
"grad_norm": 0.8578399419784546,
"learning_rate": 2.0943742165354776e-06,
"loss": 0.5453,
"step": 11151
},
{
"epoch": 0.71,
"grad_norm": 0.8718449473381042,
"learning_rate": 2.0935393094868094e-06,
"loss": 0.5702,
"step": 11152
},
{
"epoch": 0.71,
"grad_norm": 0.8640325665473938,
"learning_rate": 2.092704524819089e-06,
"loss": 0.529,
"step": 11153
},
{
"epoch": 0.71,
"grad_norm": 0.878528892993927,
"learning_rate": 2.091869862567471e-06,
"loss": 0.6061,
"step": 11154
},
{
"epoch": 0.71,
"grad_norm": 0.9092130064964294,
"learning_rate": 2.091035322767095e-06,
"loss": 0.5999,
"step": 11155
},
{
"epoch": 0.71,
"grad_norm": 0.8933854699134827,
"learning_rate": 2.0902009054531013e-06,
"loss": 0.6148,
"step": 11156
},
{
"epoch": 0.71,
"grad_norm": 0.9362192153930664,
"learning_rate": 2.089366610660627e-06,
"loss": 0.6049,
"step": 11157
},
{
"epoch": 0.71,
"grad_norm": 0.8995941281318665,
"learning_rate": 2.0885324384247956e-06,
"loss": 0.5718,
"step": 11158
},
{
"epoch": 0.71,
"grad_norm": 0.8591166734695435,
"learning_rate": 2.087698388780739e-06,
"loss": 0.5622,
"step": 11159
},
{
"epoch": 0.71,
"grad_norm": 0.8993247151374817,
"learning_rate": 2.0868644617635697e-06,
"loss": 0.5731,
"step": 11160
},
{
"epoch": 0.71,
"grad_norm": 0.9018330574035645,
"learning_rate": 2.0860306574084043e-06,
"loss": 0.5593,
"step": 11161
},
{
"epoch": 0.71,
"grad_norm": 0.889884889125824,
"learning_rate": 2.085196975750351e-06,
"loss": 0.5715,
"step": 11162
},
{
"epoch": 0.71,
"grad_norm": 0.8837161660194397,
"learning_rate": 2.084363416824513e-06,
"loss": 0.5673,
"step": 11163
},
{
"epoch": 0.71,
"grad_norm": 0.825923502445221,
"learning_rate": 2.0835299806659885e-06,
"loss": 0.5608,
"step": 11164
},
{
"epoch": 0.71,
"grad_norm": 0.9410537481307983,
"learning_rate": 2.0826966673098737e-06,
"loss": 0.6276,
"step": 11165
},
{
"epoch": 0.71,
"grad_norm": 0.8919404149055481,
"learning_rate": 2.0818634767912495e-06,
"loss": 0.6048,
"step": 11166
},
{
"epoch": 0.71,
"grad_norm": 0.9027935266494751,
"learning_rate": 2.081030409145206e-06,
"loss": 0.5912,
"step": 11167
},
{
"epoch": 0.71,
"grad_norm": 0.9064032435417175,
"learning_rate": 2.080197464406816e-06,
"loss": 0.6004,
"step": 11168
},
{
"epoch": 0.71,
"grad_norm": 0.8723354935646057,
"learning_rate": 2.0793646426111536e-06,
"loss": 0.5449,
"step": 11169
},
{
"epoch": 0.71,
"grad_norm": 0.9101514220237732,
"learning_rate": 2.078531943793288e-06,
"loss": 0.605,
"step": 11170
},
{
"epoch": 0.71,
"grad_norm": 0.9378718137741089,
"learning_rate": 2.0776993679882752e-06,
"loss": 0.5605,
"step": 11171
},
{
"epoch": 0.71,
"grad_norm": 0.8340771198272705,
"learning_rate": 2.076866915231178e-06,
"loss": 0.5802,
"step": 11172
},
{
"epoch": 0.71,
"grad_norm": 0.9529073238372803,
"learning_rate": 2.076034585557048e-06,
"loss": 0.5803,
"step": 11173
},
{
"epoch": 0.71,
"grad_norm": 0.9297928214073181,
"learning_rate": 2.075202379000928e-06,
"loss": 0.5573,
"step": 11174
},
{
"epoch": 0.71,
"grad_norm": 0.8736124038696289,
"learning_rate": 2.074370295597861e-06,
"loss": 0.5904,
"step": 11175
},
{
"epoch": 0.71,
"grad_norm": 0.947452962398529,
"learning_rate": 2.0735383353828843e-06,
"loss": 0.6098,
"step": 11176
},
{
"epoch": 0.71,
"grad_norm": 0.8607105612754822,
"learning_rate": 2.0727064983910266e-06,
"loss": 0.5983,
"step": 11177
},
{
"epoch": 0.71,
"grad_norm": 0.9094382524490356,
"learning_rate": 2.071874784657318e-06,
"loss": 0.6039,
"step": 11178
},
{
"epoch": 0.71,
"grad_norm": 0.868175745010376,
"learning_rate": 2.0710431942167713e-06,
"loss": 0.5535,
"step": 11179
},
{
"epoch": 0.71,
"grad_norm": 0.9550389647483826,
"learning_rate": 2.070211727104409e-06,
"loss": 0.5303,
"step": 11180
},
{
"epoch": 0.71,
"grad_norm": 0.939507246017456,
"learning_rate": 2.0693803833552407e-06,
"loss": 0.5674,
"step": 11181
},
{
"epoch": 0.71,
"grad_norm": 0.8654747009277344,
"learning_rate": 2.0685491630042677e-06,
"loss": 0.5973,
"step": 11182
},
{
"epoch": 0.71,
"grad_norm": 0.8690040111541748,
"learning_rate": 2.0677180660864916e-06,
"loss": 0.5844,
"step": 11183
},
{
"epoch": 0.71,
"grad_norm": 0.9559879302978516,
"learning_rate": 2.0668870926369068e-06,
"loss": 0.5674,
"step": 11184
},
{
"epoch": 0.71,
"grad_norm": 0.8681148290634155,
"learning_rate": 2.066056242690503e-06,
"loss": 0.6035,
"step": 11185
},
{
"epoch": 0.71,
"grad_norm": 0.8854528665542603,
"learning_rate": 2.0652255162822665e-06,
"loss": 0.5315,
"step": 11186
},
{
"epoch": 0.71,
"grad_norm": 0.866400957107544,
"learning_rate": 2.0643949134471726e-06,
"loss": 0.5466,
"step": 11187
},
{
"epoch": 0.71,
"grad_norm": 0.8909302949905396,
"learning_rate": 2.0635644342201942e-06,
"loss": 0.5889,
"step": 11188
},
{
"epoch": 0.71,
"grad_norm": 0.884699821472168,
"learning_rate": 2.0627340786363063e-06,
"loss": 0.6031,
"step": 11189
},
{
"epoch": 0.71,
"grad_norm": 0.930429995059967,
"learning_rate": 2.0619038467304663e-06,
"loss": 0.5815,
"step": 11190
},
{
"epoch": 0.71,
"grad_norm": 0.8738210201263428,
"learning_rate": 2.061073738537635e-06,
"loss": 0.57,
"step": 11191
},
{
"epoch": 0.71,
"grad_norm": 0.8566862344741821,
"learning_rate": 2.0602437540927644e-06,
"loss": 0.564,
"step": 11192
},
{
"epoch": 0.71,
"grad_norm": 0.9492089152336121,
"learning_rate": 2.0594138934308027e-06,
"loss": 0.6218,
"step": 11193
},
{
"epoch": 0.71,
"grad_norm": 0.929764986038208,
"learning_rate": 2.058584156586692e-06,
"loss": 0.6217,
"step": 11194
},
{
"epoch": 0.71,
"grad_norm": 0.9115621447563171,
"learning_rate": 2.0577545435953727e-06,
"loss": 0.5181,
"step": 11195
},
{
"epoch": 0.71,
"grad_norm": 0.8706603646278381,
"learning_rate": 2.05692505449177e-06,
"loss": 0.578,
"step": 11196
},
{
"epoch": 0.71,
"grad_norm": 0.958949863910675,
"learning_rate": 2.0560956893108188e-06,
"loss": 0.6523,
"step": 11197
},
{
"epoch": 0.71,
"grad_norm": 0.888208270072937,
"learning_rate": 2.0552664480874353e-06,
"loss": 0.5606,
"step": 11198
},
{
"epoch": 0.71,
"grad_norm": 1.0087759494781494,
"learning_rate": 2.0544373308565374e-06,
"loss": 0.6203,
"step": 11199
},
{
"epoch": 0.71,
"grad_norm": 0.8741673827171326,
"learning_rate": 2.0536083376530368e-06,
"loss": 0.5617,
"step": 11200
},
{
"epoch": 0.71,
"grad_norm": 0.86680668592453,
"learning_rate": 2.0527794685118397e-06,
"loss": 0.5924,
"step": 11201
},
{
"epoch": 0.71,
"grad_norm": 0.8506894111633301,
"learning_rate": 2.0519507234678464e-06,
"loss": 0.6104,
"step": 11202
},
{
"epoch": 0.71,
"grad_norm": 0.8975198864936829,
"learning_rate": 2.051122102555954e-06,
"loss": 0.5931,
"step": 11203
},
{
"epoch": 0.71,
"grad_norm": 0.9341747164726257,
"learning_rate": 2.0502936058110502e-06,
"loss": 0.6311,
"step": 11204
},
{
"epoch": 0.71,
"grad_norm": 0.8767626285552979,
"learning_rate": 2.049465233268021e-06,
"loss": 0.5843,
"step": 11205
},
{
"epoch": 0.71,
"grad_norm": 0.8813466429710388,
"learning_rate": 2.0486369849617467e-06,
"loss": 0.5672,
"step": 11206
},
{
"epoch": 0.71,
"grad_norm": 1.044753909111023,
"learning_rate": 2.0478088609271018e-06,
"loss": 0.5594,
"step": 11207
},
{
"epoch": 0.71,
"grad_norm": 0.9174667000770569,
"learning_rate": 2.0469808611989583e-06,
"loss": 0.6105,
"step": 11208
},
{
"epoch": 0.71,
"grad_norm": 0.9028404951095581,
"learning_rate": 2.0461529858121737e-06,
"loss": 0.5766,
"step": 11209
},
{
"epoch": 0.71,
"grad_norm": 0.9191893339157104,
"learning_rate": 2.0453252348016133e-06,
"loss": 0.592,
"step": 11210
},
{
"epoch": 0.71,
"grad_norm": 0.9252839088439941,
"learning_rate": 2.04449760820213e-06,
"loss": 0.5725,
"step": 11211
},
{
"epoch": 0.71,
"grad_norm": 0.8581644296646118,
"learning_rate": 2.04367010604857e-06,
"loss": 0.5989,
"step": 11212
},
{
"epoch": 0.71,
"grad_norm": 0.8583932518959045,
"learning_rate": 2.042842728375777e-06,
"loss": 0.5921,
"step": 11213
},
{
"epoch": 0.71,
"grad_norm": 0.8592872023582458,
"learning_rate": 2.0420154752185896e-06,
"loss": 0.5688,
"step": 11214
},
{
"epoch": 0.71,
"grad_norm": 0.8804487586021423,
"learning_rate": 2.0411883466118406e-06,
"loss": 0.6106,
"step": 11215
},
{
"epoch": 0.71,
"grad_norm": 0.8709439039230347,
"learning_rate": 2.0403613425903584e-06,
"loss": 0.5829,
"step": 11216
},
{
"epoch": 0.71,
"grad_norm": 0.9017482995986938,
"learning_rate": 2.0395344631889636e-06,
"loss": 0.5525,
"step": 11217
},
{
"epoch": 0.71,
"grad_norm": 0.8507391810417175,
"learning_rate": 2.038707708442471e-06,
"loss": 0.6201,
"step": 11218
},
{
"epoch": 0.71,
"grad_norm": 0.9192302227020264,
"learning_rate": 2.0378810783856996e-06,
"loss": 0.5915,
"step": 11219
},
{
"epoch": 0.71,
"grad_norm": 0.8469076156616211,
"learning_rate": 2.0370545730534493e-06,
"loss": 0.5402,
"step": 11220
},
{
"epoch": 0.71,
"grad_norm": 0.9404274225234985,
"learning_rate": 2.0362281924805238e-06,
"loss": 0.5853,
"step": 11221
},
{
"epoch": 0.71,
"grad_norm": 0.8778280019760132,
"learning_rate": 2.035401936701719e-06,
"loss": 0.5735,
"step": 11222
},
{
"epoch": 0.71,
"grad_norm": 0.8963788151741028,
"learning_rate": 2.034575805751825e-06,
"loss": 0.5796,
"step": 11223
},
{
"epoch": 0.71,
"grad_norm": 0.8657545447349548,
"learning_rate": 2.0337497996656303e-06,
"loss": 0.5865,
"step": 11224
},
{
"epoch": 0.71,
"grad_norm": 0.8633813261985779,
"learning_rate": 2.03292391847791e-06,
"loss": 0.568,
"step": 11225
},
{
"epoch": 0.71,
"grad_norm": 1.0021978616714478,
"learning_rate": 2.032098162223441e-06,
"loss": 0.5911,
"step": 11226
},
{
"epoch": 0.71,
"grad_norm": 0.8717944622039795,
"learning_rate": 2.031272530936997e-06,
"loss": 0.5538,
"step": 11227
},
{
"epoch": 0.71,
"grad_norm": 0.8620628118515015,
"learning_rate": 2.0304470246533377e-06,
"loss": 0.583,
"step": 11228
},
{
"epoch": 0.71,
"grad_norm": 0.8687443733215332,
"learning_rate": 2.0296216434072237e-06,
"loss": 0.5445,
"step": 11229
},
{
"epoch": 0.71,
"grad_norm": 0.9198354482650757,
"learning_rate": 2.0287963872334093e-06,
"loss": 0.5932,
"step": 11230
},
{
"epoch": 0.71,
"grad_norm": 0.83836430311203,
"learning_rate": 2.0279712561666425e-06,
"loss": 0.5579,
"step": 11231
},
{
"epoch": 0.71,
"grad_norm": 0.8580030202865601,
"learning_rate": 2.0271462502416694e-06,
"loss": 0.5878,
"step": 11232
},
{
"epoch": 0.71,
"grad_norm": 0.8928140997886658,
"learning_rate": 2.0263213694932238e-06,
"loss": 0.517,
"step": 11233
},
{
"epoch": 0.71,
"grad_norm": 0.8412920832633972,
"learning_rate": 2.0254966139560404e-06,
"loss": 0.5409,
"step": 11234
},
{
"epoch": 0.71,
"grad_norm": 0.9490091800689697,
"learning_rate": 2.0246719836648476e-06,
"loss": 0.5876,
"step": 11235
},
{
"epoch": 0.71,
"grad_norm": 0.9710505604743958,
"learning_rate": 2.0238474786543673e-06,
"loss": 0.5687,
"step": 11236
},
{
"epoch": 0.71,
"grad_norm": 0.8756168484687805,
"learning_rate": 2.0230230989593157e-06,
"loss": 0.5713,
"step": 11237
},
{
"epoch": 0.71,
"grad_norm": 0.9873241782188416,
"learning_rate": 2.0221988446144076e-06,
"loss": 0.6178,
"step": 11238
},
{
"epoch": 0.71,
"grad_norm": 0.9938790798187256,
"learning_rate": 2.0213747156543432e-06,
"loss": 0.565,
"step": 11239
},
{
"epoch": 0.71,
"grad_norm": 0.8901024460792542,
"learning_rate": 2.0205507121138316e-06,
"loss": 0.565,
"step": 11240
},
{
"epoch": 0.71,
"grad_norm": 0.923038899898529,
"learning_rate": 2.019726834027563e-06,
"loss": 0.5797,
"step": 11241
},
{
"epoch": 0.71,
"grad_norm": 0.925470769405365,
"learning_rate": 2.0189030814302295e-06,
"loss": 0.5736,
"step": 11242
},
{
"epoch": 0.71,
"grad_norm": 0.9232540130615234,
"learning_rate": 2.018079454356517e-06,
"loss": 0.5231,
"step": 11243
},
{
"epoch": 0.71,
"grad_norm": 0.9192769527435303,
"learning_rate": 2.017255952841105e-06,
"loss": 0.593,
"step": 11244
},
{
"epoch": 0.71,
"grad_norm": 0.9208205938339233,
"learning_rate": 2.016432576918669e-06,
"loss": 0.5463,
"step": 11245
},
{
"epoch": 0.71,
"grad_norm": 0.8803871870040894,
"learning_rate": 2.0156093266238795e-06,
"loss": 0.5155,
"step": 11246
},
{
"epoch": 0.71,
"grad_norm": 0.9068865776062012,
"learning_rate": 2.014786201991396e-06,
"loss": 0.613,
"step": 11247
},
{
"epoch": 0.71,
"grad_norm": 0.9430435299873352,
"learning_rate": 2.0139632030558844e-06,
"loss": 0.6024,
"step": 11248
},
{
"epoch": 0.71,
"grad_norm": 0.8339919447898865,
"learning_rate": 2.0131403298519927e-06,
"loss": 0.5304,
"step": 11249
},
{
"epoch": 0.71,
"grad_norm": 0.9149238467216492,
"learning_rate": 2.012317582414371e-06,
"loss": 0.5962,
"step": 11250
},
{
"epoch": 0.71,
"grad_norm": 0.8416147828102112,
"learning_rate": 2.011494960777663e-06,
"loss": 0.5355,
"step": 11251
},
{
"epoch": 0.71,
"grad_norm": 0.8826677203178406,
"learning_rate": 2.0106724649765055e-06,
"loss": 0.6031,
"step": 11252
},
{
"epoch": 0.71,
"grad_norm": 0.9082532525062561,
"learning_rate": 2.0098500950455313e-06,
"loss": 0.6503,
"step": 11253
},
{
"epoch": 0.71,
"grad_norm": 0.924066960811615,
"learning_rate": 2.00902785101937e-06,
"loss": 0.5527,
"step": 11254
},
{
"epoch": 0.71,
"grad_norm": 0.8147642016410828,
"learning_rate": 2.008205732932639e-06,
"loss": 0.5123,
"step": 11255
},
{
"epoch": 0.71,
"grad_norm": 0.8650907278060913,
"learning_rate": 2.0073837408199566e-06,
"loss": 0.5695,
"step": 11256
},
{
"epoch": 0.71,
"grad_norm": 0.9047468900680542,
"learning_rate": 2.0065618747159342e-06,
"loss": 0.556,
"step": 11257
},
{
"epoch": 0.71,
"grad_norm": 0.8059144020080566,
"learning_rate": 2.0057401346551785e-06,
"loss": 0.5433,
"step": 11258
},
{
"epoch": 0.71,
"grad_norm": 0.8873769044876099,
"learning_rate": 2.004918520672289e-06,
"loss": 0.6051,
"step": 11259
},
{
"epoch": 0.71,
"grad_norm": 0.8762856125831604,
"learning_rate": 2.0040970328018618e-06,
"loss": 0.6158,
"step": 11260
},
{
"epoch": 0.71,
"grad_norm": 0.8870139718055725,
"learning_rate": 2.0032756710784864e-06,
"loss": 0.5862,
"step": 11261
},
{
"epoch": 0.71,
"grad_norm": 0.9140628576278687,
"learning_rate": 2.0024544355367494e-06,
"loss": 0.5758,
"step": 11262
},
{
"epoch": 0.71,
"grad_norm": 0.9125930666923523,
"learning_rate": 2.001633326211227e-06,
"loss": 0.5277,
"step": 11263
},
{
"epoch": 0.71,
"grad_norm": 0.8611398339271545,
"learning_rate": 2.000812343136494e-06,
"loss": 0.5982,
"step": 11264
},
{
"epoch": 0.71,
"grad_norm": 0.9212353229522705,
"learning_rate": 1.99999148634712e-06,
"loss": 0.5773,
"step": 11265
},
{
"epoch": 0.71,
"grad_norm": 0.8876082897186279,
"learning_rate": 1.9991707558776686e-06,
"loss": 0.5718,
"step": 11266
},
{
"epoch": 0.71,
"grad_norm": 0.9632240533828735,
"learning_rate": 1.9983501517626976e-06,
"loss": 0.654,
"step": 11267
},
{
"epoch": 0.71,
"grad_norm": 0.8377987742424011,
"learning_rate": 1.997529674036761e-06,
"loss": 0.5421,
"step": 11268
},
{
"epoch": 0.71,
"grad_norm": 0.8990334272384644,
"learning_rate": 1.9967093227344013e-06,
"loss": 0.6147,
"step": 11269
},
{
"epoch": 0.71,
"grad_norm": 0.8912368416786194,
"learning_rate": 1.9958890978901685e-06,
"loss": 0.5817,
"step": 11270
},
{
"epoch": 0.71,
"grad_norm": 0.9113243818283081,
"learning_rate": 1.9950689995385936e-06,
"loss": 0.5697,
"step": 11271
},
{
"epoch": 0.71,
"grad_norm": 0.9373201727867126,
"learning_rate": 1.994249027714209e-06,
"loss": 0.6274,
"step": 11272
},
{
"epoch": 0.71,
"grad_norm": 0.8835095167160034,
"learning_rate": 1.9934291824515423e-06,
"loss": 0.57,
"step": 11273
},
{
"epoch": 0.71,
"grad_norm": 0.9121303558349609,
"learning_rate": 1.9926094637851135e-06,
"loss": 0.5945,
"step": 11274
},
{
"epoch": 0.71,
"grad_norm": 0.8844984769821167,
"learning_rate": 1.9917898717494377e-06,
"loss": 0.5921,
"step": 11275
},
{
"epoch": 0.71,
"grad_norm": 0.8488909006118774,
"learning_rate": 1.990970406379028e-06,
"loss": 0.5555,
"step": 11276
},
{
"epoch": 0.71,
"grad_norm": 0.9113077521324158,
"learning_rate": 1.990151067708383e-06,
"loss": 0.6299,
"step": 11277
},
{
"epoch": 0.71,
"grad_norm": 0.9047096967697144,
"learning_rate": 1.9893318557720093e-06,
"loss": 0.5794,
"step": 11278
},
{
"epoch": 0.71,
"grad_norm": 0.8998475670814514,
"learning_rate": 1.9885127706043966e-06,
"loss": 0.5783,
"step": 11279
},
{
"epoch": 0.71,
"grad_norm": 0.9234053492546082,
"learning_rate": 1.9876938122400348e-06,
"loss": 0.5779,
"step": 11280
},
{
"epoch": 0.71,
"grad_norm": 0.9886752367019653,
"learning_rate": 1.9868749807134087e-06,
"loss": 0.5843,
"step": 11281
},
{
"epoch": 0.71,
"grad_norm": 0.870291531085968,
"learning_rate": 1.9860562760589926e-06,
"loss": 0.5362,
"step": 11282
},
{
"epoch": 0.71,
"grad_norm": 0.9425791501998901,
"learning_rate": 1.9852376983112632e-06,
"loss": 0.6392,
"step": 11283
},
{
"epoch": 0.71,
"grad_norm": 0.8813480734825134,
"learning_rate": 1.9844192475046885e-06,
"loss": 0.5949,
"step": 11284
},
{
"epoch": 0.71,
"grad_norm": 0.8936353921890259,
"learning_rate": 1.983600923673727e-06,
"loss": 0.5256,
"step": 11285
},
{
"epoch": 0.72,
"grad_norm": 0.87742018699646,
"learning_rate": 1.9827827268528378e-06,
"loss": 0.544,
"step": 11286
},
{
"epoch": 0.72,
"grad_norm": 0.9305248260498047,
"learning_rate": 1.9819646570764712e-06,
"loss": 0.5996,
"step": 11287
},
{
"epoch": 0.72,
"grad_norm": 0.9481449723243713,
"learning_rate": 1.981146714379074e-06,
"loss": 0.617,
"step": 11288
},
{
"epoch": 0.72,
"grad_norm": 0.8779386281967163,
"learning_rate": 1.980328898795089e-06,
"loss": 0.5538,
"step": 11289
},
{
"epoch": 0.72,
"grad_norm": 0.9480637311935425,
"learning_rate": 1.979511210358946e-06,
"loss": 0.571,
"step": 11290
},
{
"epoch": 0.72,
"grad_norm": 0.8718064427375793,
"learning_rate": 1.9786936491050803e-06,
"loss": 0.5632,
"step": 11291
},
{
"epoch": 0.72,
"grad_norm": 0.8864429593086243,
"learning_rate": 1.9778762150679155e-06,
"loss": 0.5402,
"step": 11292
},
{
"epoch": 0.72,
"grad_norm": 0.861854076385498,
"learning_rate": 1.9770589082818694e-06,
"loss": 0.5858,
"step": 11293
},
{
"epoch": 0.72,
"grad_norm": 0.8690637350082397,
"learning_rate": 1.9762417287813557e-06,
"loss": 0.5755,
"step": 11294
},
{
"epoch": 0.72,
"grad_norm": 0.9482481479644775,
"learning_rate": 1.9754246766007847e-06,
"loss": 0.5812,
"step": 11295
},
{
"epoch": 0.72,
"grad_norm": 0.8821942806243896,
"learning_rate": 1.9746077517745582e-06,
"loss": 0.5189,
"step": 11296
},
{
"epoch": 0.72,
"grad_norm": 0.8461850881576538,
"learning_rate": 1.9737909543370764e-06,
"loss": 0.5615,
"step": 11297
},
{
"epoch": 0.72,
"grad_norm": 0.8924559950828552,
"learning_rate": 1.972974284322729e-06,
"loss": 0.5045,
"step": 11298
},
{
"epoch": 0.72,
"grad_norm": 0.8439890742301941,
"learning_rate": 1.9721577417659023e-06,
"loss": 0.5655,
"step": 11299
},
{
"epoch": 0.72,
"grad_norm": 0.8429501056671143,
"learning_rate": 1.9713413267009827e-06,
"loss": 0.5588,
"step": 11300
},
{
"epoch": 0.72,
"grad_norm": 0.8670548796653748,
"learning_rate": 1.970525039162343e-06,
"loss": 0.6076,
"step": 11301
},
{
"epoch": 0.72,
"grad_norm": 0.8741304874420166,
"learning_rate": 1.969708879184355e-06,
"loss": 0.6139,
"step": 11302
},
{
"epoch": 0.72,
"grad_norm": 0.8507691025733948,
"learning_rate": 1.9688928468013846e-06,
"loss": 0.5363,
"step": 11303
},
{
"epoch": 0.72,
"grad_norm": 0.8928526043891907,
"learning_rate": 1.968076942047791e-06,
"loss": 0.5258,
"step": 11304
},
{
"epoch": 0.72,
"grad_norm": 0.9080408215522766,
"learning_rate": 1.9672611649579332e-06,
"loss": 0.5913,
"step": 11305
},
{
"epoch": 0.72,
"grad_norm": 0.8880747556686401,
"learning_rate": 1.966445515566155e-06,
"loss": 0.5521,
"step": 11306
},
{
"epoch": 0.72,
"grad_norm": 0.9175702929496765,
"learning_rate": 1.965629993906802e-06,
"loss": 0.5995,
"step": 11307
},
{
"epoch": 0.72,
"grad_norm": 0.8689432144165039,
"learning_rate": 1.9648146000142173e-06,
"loss": 0.5661,
"step": 11308
},
{
"epoch": 0.72,
"grad_norm": 0.868963897228241,
"learning_rate": 1.963999333922729e-06,
"loss": 0.52,
"step": 11309
},
{
"epoch": 0.72,
"grad_norm": 0.8676355481147766,
"learning_rate": 1.963184195666668e-06,
"loss": 0.576,
"step": 11310
},
{
"epoch": 0.72,
"grad_norm": 0.8827881217002869,
"learning_rate": 1.9623691852803577e-06,
"loss": 0.5648,
"step": 11311
},
{
"epoch": 0.72,
"grad_norm": 0.8609069585800171,
"learning_rate": 1.9615543027981105e-06,
"loss": 0.5785,
"step": 11312
},
{
"epoch": 0.72,
"grad_norm": 0.8777223229408264,
"learning_rate": 1.9607395482542446e-06,
"loss": 0.5395,
"step": 11313
},
{
"epoch": 0.72,
"grad_norm": 0.9304616451263428,
"learning_rate": 1.9599249216830624e-06,
"loss": 0.6124,
"step": 11314
},
{
"epoch": 0.72,
"grad_norm": 0.945838987827301,
"learning_rate": 1.9591104231188656e-06,
"loss": 0.5889,
"step": 11315
},
{
"epoch": 0.72,
"grad_norm": 0.8509537577629089,
"learning_rate": 1.958296052595951e-06,
"loss": 0.558,
"step": 11316
},
{
"epoch": 0.72,
"grad_norm": 0.9135622978210449,
"learning_rate": 1.9574818101486075e-06,
"loss": 0.5564,
"step": 11317
},
{
"epoch": 0.72,
"grad_norm": 0.9122533202171326,
"learning_rate": 1.9566676958111214e-06,
"loss": 0.5926,
"step": 11318
},
{
"epoch": 0.72,
"grad_norm": 0.9296271800994873,
"learning_rate": 1.955853709617773e-06,
"loss": 0.597,
"step": 11319
},
{
"epoch": 0.72,
"grad_norm": 0.8591296672821045,
"learning_rate": 1.955039851602832e-06,
"loss": 0.6263,
"step": 11320
},
{
"epoch": 0.72,
"grad_norm": 0.9196903705596924,
"learning_rate": 1.9542261218005737e-06,
"loss": 0.5828,
"step": 11321
},
{
"epoch": 0.72,
"grad_norm": 0.9328646659851074,
"learning_rate": 1.9534125202452557e-06,
"loss": 0.5962,
"step": 11322
},
{
"epoch": 0.72,
"grad_norm": 0.9008827209472656,
"learning_rate": 1.952599046971139e-06,
"loss": 0.6416,
"step": 11323
},
{
"epoch": 0.72,
"grad_norm": 0.9060094952583313,
"learning_rate": 1.951785702012475e-06,
"loss": 0.5831,
"step": 11324
},
{
"epoch": 0.72,
"grad_norm": 0.8941810727119446,
"learning_rate": 1.9509724854035105e-06,
"loss": 0.5775,
"step": 11325
},
{
"epoch": 0.72,
"grad_norm": 0.9267244935035706,
"learning_rate": 1.950159397178488e-06,
"loss": 0.6576,
"step": 11326
},
{
"epoch": 0.72,
"grad_norm": 0.908926784992218,
"learning_rate": 1.9493464373716458e-06,
"loss": 0.5861,
"step": 11327
},
{
"epoch": 0.72,
"grad_norm": 0.8545692563056946,
"learning_rate": 1.9485336060172106e-06,
"loss": 0.5916,
"step": 11328
},
{
"epoch": 0.72,
"grad_norm": 0.9397866725921631,
"learning_rate": 1.9477209031494104e-06,
"loss": 0.5883,
"step": 11329
},
{
"epoch": 0.72,
"grad_norm": 0.8065406680107117,
"learning_rate": 1.9469083288024647e-06,
"loss": 0.5156,
"step": 11330
},
{
"epoch": 0.72,
"grad_norm": 0.8719428181648254,
"learning_rate": 1.9460958830105882e-06,
"loss": 0.61,
"step": 11331
},
{
"epoch": 0.72,
"grad_norm": 0.9234678149223328,
"learning_rate": 1.9452835658079905e-06,
"loss": 0.5471,
"step": 11332
},
{
"epoch": 0.72,
"grad_norm": 0.941314160823822,
"learning_rate": 1.9444713772288747e-06,
"loss": 0.6378,
"step": 11333
},
{
"epoch": 0.72,
"grad_norm": 0.9694302082061768,
"learning_rate": 1.94365931730744e-06,
"loss": 0.6152,
"step": 11334
},
{
"epoch": 0.72,
"grad_norm": 0.9199761152267456,
"learning_rate": 1.9428473860778817e-06,
"loss": 0.5797,
"step": 11335
},
{
"epoch": 0.72,
"grad_norm": 0.8677429556846619,
"learning_rate": 1.9420355835743826e-06,
"loss": 0.6189,
"step": 11336
},
{
"epoch": 0.72,
"grad_norm": 0.866114616394043,
"learning_rate": 1.941223909831125e-06,
"loss": 0.5872,
"step": 11337
},
{
"epoch": 0.72,
"grad_norm": 1.0049126148223877,
"learning_rate": 1.9404123648822924e-06,
"loss": 0.6329,
"step": 11338
},
{
"epoch": 0.72,
"grad_norm": 0.9035833477973938,
"learning_rate": 1.9396009487620494e-06,
"loss": 0.5539,
"step": 11339
},
{
"epoch": 0.72,
"grad_norm": 0.9090478420257568,
"learning_rate": 1.9387896615045636e-06,
"loss": 0.5676,
"step": 11340
},
{
"epoch": 0.72,
"grad_norm": 0.9102824330329895,
"learning_rate": 1.9379785031439985e-06,
"loss": 0.5586,
"step": 11341
},
{
"epoch": 0.72,
"grad_norm": 0.8439232707023621,
"learning_rate": 1.9371674737145023e-06,
"loss": 0.5545,
"step": 11342
},
{
"epoch": 0.72,
"grad_norm": 0.8065714836120605,
"learning_rate": 1.936356573250233e-06,
"loss": 0.5784,
"step": 11343
},
{
"epoch": 0.72,
"grad_norm": 0.8955844044685364,
"learning_rate": 1.935545801785329e-06,
"loss": 0.5508,
"step": 11344
},
{
"epoch": 0.72,
"grad_norm": 0.8924664258956909,
"learning_rate": 1.934735159353931e-06,
"loss": 0.5664,
"step": 11345
},
{
"epoch": 0.72,
"grad_norm": 0.8322812914848328,
"learning_rate": 1.9339246459901715e-06,
"loss": 0.5858,
"step": 11346
},
{
"epoch": 0.72,
"grad_norm": 0.8678402304649353,
"learning_rate": 1.93311426172818e-06,
"loss": 0.5856,
"step": 11347
},
{
"epoch": 0.72,
"grad_norm": 0.8565698266029358,
"learning_rate": 1.9323040066020774e-06,
"loss": 0.6008,
"step": 11348
},
{
"epoch": 0.72,
"grad_norm": 0.8522049188613892,
"learning_rate": 1.931493880645983e-06,
"loss": 0.5971,
"step": 11349
},
{
"epoch": 0.72,
"grad_norm": 0.8686321973800659,
"learning_rate": 1.9306838838940035e-06,
"loss": 0.552,
"step": 11350
},
{
"epoch": 0.72,
"grad_norm": 0.9111335873603821,
"learning_rate": 1.9298740163802523e-06,
"loss": 0.597,
"step": 11351
},
{
"epoch": 0.72,
"grad_norm": 0.8459984064102173,
"learning_rate": 1.929064278138823e-06,
"loss": 0.5981,
"step": 11352
},
{
"epoch": 0.72,
"grad_norm": 0.9223425388336182,
"learning_rate": 1.928254669203815e-06,
"loss": 0.6072,
"step": 11353
},
{
"epoch": 0.72,
"grad_norm": 0.9331545233726501,
"learning_rate": 1.9274451896093164e-06,
"loss": 0.6259,
"step": 11354
},
{
"epoch": 0.72,
"grad_norm": 0.8875550627708435,
"learning_rate": 1.926635839389413e-06,
"loss": 0.5238,
"step": 11355
},
{
"epoch": 0.72,
"grad_norm": 0.8523957133293152,
"learning_rate": 1.925826618578182e-06,
"loss": 0.5549,
"step": 11356
},
{
"epoch": 0.72,
"grad_norm": 0.8597106337547302,
"learning_rate": 1.9250175272097003e-06,
"loss": 0.5347,
"step": 11357
},
{
"epoch": 0.72,
"grad_norm": 0.865592360496521,
"learning_rate": 1.9242085653180314e-06,
"loss": 0.5577,
"step": 11358
},
{
"epoch": 0.72,
"grad_norm": 0.9318245649337769,
"learning_rate": 1.9233997329372402e-06,
"loss": 0.6011,
"step": 11359
},
{
"epoch": 0.72,
"grad_norm": 0.9316973090171814,
"learning_rate": 1.9225910301013834e-06,
"loss": 0.5626,
"step": 11360
},
{
"epoch": 0.72,
"grad_norm": 0.9204529523849487,
"learning_rate": 1.9217824568445125e-06,
"loss": 0.6112,
"step": 11361
},
{
"epoch": 0.72,
"grad_norm": 0.8521873950958252,
"learning_rate": 1.920974013200676e-06,
"loss": 0.5743,
"step": 11362
},
{
"epoch": 0.72,
"grad_norm": 0.8950269222259521,
"learning_rate": 1.9201656992039092e-06,
"loss": 0.5561,
"step": 11363
},
{
"epoch": 0.72,
"grad_norm": 0.9938862323760986,
"learning_rate": 1.9193575148882526e-06,
"loss": 0.6297,
"step": 11364
},
{
"epoch": 0.72,
"grad_norm": 0.9481446743011475,
"learning_rate": 1.918549460287736e-06,
"loss": 0.6234,
"step": 11365
},
{
"epoch": 0.72,
"grad_norm": 0.8465852737426758,
"learning_rate": 1.9177415354363802e-06,
"loss": 0.5314,
"step": 11366
},
{
"epoch": 0.72,
"grad_norm": 0.8947675824165344,
"learning_rate": 1.916933740368206e-06,
"loss": 0.5545,
"step": 11367
},
{
"epoch": 0.72,
"grad_norm": 0.8321127891540527,
"learning_rate": 1.916126075117227e-06,
"loss": 0.5376,
"step": 11368
},
{
"epoch": 0.72,
"grad_norm": 0.8513221740722656,
"learning_rate": 1.9153185397174506e-06,
"loss": 0.5966,
"step": 11369
},
{
"epoch": 0.72,
"grad_norm": 0.8549067974090576,
"learning_rate": 1.9145111342028817e-06,
"loss": 0.5757,
"step": 11370
},
{
"epoch": 0.72,
"grad_norm": 0.9566909074783325,
"learning_rate": 1.9137038586075117e-06,
"loss": 0.5956,
"step": 11371
},
{
"epoch": 0.72,
"grad_norm": 0.8595585823059082,
"learning_rate": 1.9128967129653375e-06,
"loss": 0.5711,
"step": 11372
},
{
"epoch": 0.72,
"grad_norm": 0.8773391842842102,
"learning_rate": 1.9120896973103453e-06,
"loss": 0.6042,
"step": 11373
},
{
"epoch": 0.72,
"grad_norm": 0.970403790473938,
"learning_rate": 1.911282811676512e-06,
"loss": 0.6186,
"step": 11374
},
{
"epoch": 0.72,
"grad_norm": 0.838644802570343,
"learning_rate": 1.9104760560978147e-06,
"loss": 0.5634,
"step": 11375
},
{
"epoch": 0.72,
"grad_norm": 0.8876333236694336,
"learning_rate": 1.909669430608223e-06,
"loss": 0.543,
"step": 11376
},
{
"epoch": 0.72,
"grad_norm": 0.9171946048736572,
"learning_rate": 1.908862935241701e-06,
"loss": 0.5908,
"step": 11377
},
{
"epoch": 0.72,
"grad_norm": 0.8265011310577393,
"learning_rate": 1.9080565700322095e-06,
"loss": 0.5305,
"step": 11378
},
{
"epoch": 0.72,
"grad_norm": 0.9624162912368774,
"learning_rate": 1.9072503350136979e-06,
"loss": 0.5419,
"step": 11379
},
{
"epoch": 0.72,
"grad_norm": 0.8564184308052063,
"learning_rate": 1.9064442302201136e-06,
"loss": 0.5412,
"step": 11380
},
{
"epoch": 0.72,
"grad_norm": 0.9162154793739319,
"learning_rate": 1.9056382556854053e-06,
"loss": 0.598,
"step": 11381
},
{
"epoch": 0.72,
"grad_norm": 0.8687850832939148,
"learning_rate": 1.9048324114435036e-06,
"loss": 0.5839,
"step": 11382
},
{
"epoch": 0.72,
"grad_norm": 0.8435238003730774,
"learning_rate": 1.9040266975283417e-06,
"loss": 0.5566,
"step": 11383
},
{
"epoch": 0.72,
"grad_norm": 0.8652970790863037,
"learning_rate": 1.9032211139738455e-06,
"loss": 0.5781,
"step": 11384
},
{
"epoch": 0.72,
"grad_norm": 0.863194465637207,
"learning_rate": 1.902415660813935e-06,
"loss": 0.6132,
"step": 11385
},
{
"epoch": 0.72,
"grad_norm": 0.9415022134780884,
"learning_rate": 1.9016103380825274e-06,
"loss": 0.5613,
"step": 11386
},
{
"epoch": 0.72,
"grad_norm": 0.9320086240768433,
"learning_rate": 1.900805145813528e-06,
"loss": 0.5869,
"step": 11387
},
{
"epoch": 0.72,
"grad_norm": 0.8881116509437561,
"learning_rate": 1.9000000840408421e-06,
"loss": 0.5784,
"step": 11388
},
{
"epoch": 0.72,
"grad_norm": 0.8864371180534363,
"learning_rate": 1.8991951527983694e-06,
"loss": 0.5878,
"step": 11389
},
{
"epoch": 0.72,
"grad_norm": 0.922127902507782,
"learning_rate": 1.8983903521200015e-06,
"loss": 0.5998,
"step": 11390
},
{
"epoch": 0.72,
"grad_norm": 0.8979513049125671,
"learning_rate": 1.8975856820396265e-06,
"loss": 0.5647,
"step": 11391
},
{
"epoch": 0.72,
"grad_norm": 0.8566264510154724,
"learning_rate": 1.8967811425911275e-06,
"loss": 0.6127,
"step": 11392
},
{
"epoch": 0.72,
"grad_norm": 0.9450397491455078,
"learning_rate": 1.8959767338083758e-06,
"loss": 0.5713,
"step": 11393
},
{
"epoch": 0.72,
"grad_norm": 0.8804638385772705,
"learning_rate": 1.8951724557252472e-06,
"loss": 0.5315,
"step": 11394
},
{
"epoch": 0.72,
"grad_norm": 0.8379377722740173,
"learning_rate": 1.8943683083756075e-06,
"loss": 0.5763,
"step": 11395
},
{
"epoch": 0.72,
"grad_norm": 0.9465924501419067,
"learning_rate": 1.8935642917933128e-06,
"loss": 0.6547,
"step": 11396
},
{
"epoch": 0.72,
"grad_norm": 0.9012244343757629,
"learning_rate": 1.8927604060122196e-06,
"loss": 0.5796,
"step": 11397
},
{
"epoch": 0.72,
"grad_norm": 0.9090456962585449,
"learning_rate": 1.8919566510661758e-06,
"loss": 0.5763,
"step": 11398
},
{
"epoch": 0.72,
"grad_norm": 0.8996036648750305,
"learning_rate": 1.891153026989026e-06,
"loss": 0.5953,
"step": 11399
},
{
"epoch": 0.72,
"grad_norm": 0.8748338222503662,
"learning_rate": 1.8903495338146089e-06,
"loss": 0.5869,
"step": 11400
},
{
"epoch": 0.72,
"grad_norm": 0.8042425513267517,
"learning_rate": 1.8895461715767517e-06,
"loss": 0.5118,
"step": 11401
},
{
"epoch": 0.72,
"grad_norm": 0.8664458990097046,
"learning_rate": 1.888742940309286e-06,
"loss": 0.5208,
"step": 11402
},
{
"epoch": 0.72,
"grad_norm": 0.9896268248558044,
"learning_rate": 1.8879398400460342e-06,
"loss": 0.6079,
"step": 11403
},
{
"epoch": 0.72,
"grad_norm": 0.849636435508728,
"learning_rate": 1.8871368708208076e-06,
"loss": 0.5507,
"step": 11404
},
{
"epoch": 0.72,
"grad_norm": 0.8900498151779175,
"learning_rate": 1.8863340326674184e-06,
"loss": 0.6023,
"step": 11405
},
{
"epoch": 0.72,
"grad_norm": 0.8602756261825562,
"learning_rate": 1.8855313256196722e-06,
"loss": 0.6087,
"step": 11406
},
{
"epoch": 0.72,
"grad_norm": 0.8602705001831055,
"learning_rate": 1.8847287497113664e-06,
"loss": 0.5247,
"step": 11407
},
{
"epoch": 0.72,
"grad_norm": 0.879084587097168,
"learning_rate": 1.883926304976298e-06,
"loss": 0.544,
"step": 11408
},
{
"epoch": 0.72,
"grad_norm": 0.8612745404243469,
"learning_rate": 1.8831239914482512e-06,
"loss": 0.5575,
"step": 11409
},
{
"epoch": 0.72,
"grad_norm": 0.8964210152626038,
"learning_rate": 1.8823218091610085e-06,
"loss": 0.6014,
"step": 11410
},
{
"epoch": 0.72,
"grad_norm": 0.891295313835144,
"learning_rate": 1.8815197581483523e-06,
"loss": 0.5591,
"step": 11411
},
{
"epoch": 0.72,
"grad_norm": 0.9350022673606873,
"learning_rate": 1.880717838444049e-06,
"loss": 0.5922,
"step": 11412
},
{
"epoch": 0.72,
"grad_norm": 0.8329875469207764,
"learning_rate": 1.879916050081866e-06,
"loss": 0.5386,
"step": 11413
},
{
"epoch": 0.72,
"grad_norm": 0.8741490244865417,
"learning_rate": 1.8791143930955641e-06,
"loss": 0.5588,
"step": 11414
},
{
"epoch": 0.72,
"grad_norm": 0.8966600894927979,
"learning_rate": 1.8783128675188988e-06,
"loss": 0.6352,
"step": 11415
},
{
"epoch": 0.72,
"grad_norm": 0.8930423855781555,
"learning_rate": 1.8775114733856203e-06,
"loss": 0.5551,
"step": 11416
},
{
"epoch": 0.72,
"grad_norm": 0.835688591003418,
"learning_rate": 1.87671021072947e-06,
"loss": 0.5729,
"step": 11417
},
{
"epoch": 0.72,
"grad_norm": 0.9322239756584167,
"learning_rate": 1.8759090795841856e-06,
"loss": 0.5968,
"step": 11418
},
{
"epoch": 0.72,
"grad_norm": 0.9086197018623352,
"learning_rate": 1.8751080799835059e-06,
"loss": 0.5331,
"step": 11419
},
{
"epoch": 0.72,
"grad_norm": 0.8828703165054321,
"learning_rate": 1.8743072119611522e-06,
"loss": 0.5389,
"step": 11420
},
{
"epoch": 0.72,
"grad_norm": 0.9224802255630493,
"learning_rate": 1.873506475550848e-06,
"loss": 0.6192,
"step": 11421
},
{
"epoch": 0.72,
"grad_norm": 0.8025329113006592,
"learning_rate": 1.8727058707863121e-06,
"loss": 0.5028,
"step": 11422
},
{
"epoch": 0.72,
"grad_norm": 0.8815546631813049,
"learning_rate": 1.871905397701249e-06,
"loss": 0.6344,
"step": 11423
},
{
"epoch": 0.72,
"grad_norm": 0.9033147692680359,
"learning_rate": 1.8711050563293714e-06,
"loss": 0.5781,
"step": 11424
},
{
"epoch": 0.72,
"grad_norm": 0.8625471591949463,
"learning_rate": 1.8703048467043732e-06,
"loss": 0.5251,
"step": 11425
},
{
"epoch": 0.72,
"grad_norm": 0.9039772152900696,
"learning_rate": 1.869504768859951e-06,
"loss": 0.5467,
"step": 11426
},
{
"epoch": 0.72,
"grad_norm": 0.9913069605827332,
"learning_rate": 1.8687048228297928e-06,
"loss": 0.6187,
"step": 11427
},
{
"epoch": 0.72,
"grad_norm": 0.9143205881118774,
"learning_rate": 1.8679050086475814e-06,
"loss": 0.5619,
"step": 11428
},
{
"epoch": 0.72,
"grad_norm": 0.9044589400291443,
"learning_rate": 1.867105326346994e-06,
"loss": 0.6366,
"step": 11429
},
{
"epoch": 0.72,
"grad_norm": 0.8686836361885071,
"learning_rate": 1.8663057759617048e-06,
"loss": 0.5955,
"step": 11430
},
{
"epoch": 0.72,
"grad_norm": 0.9669235348701477,
"learning_rate": 1.8655063575253746e-06,
"loss": 0.5954,
"step": 11431
},
{
"epoch": 0.72,
"grad_norm": 0.9471785426139832,
"learning_rate": 1.8647070710716709e-06,
"loss": 0.6246,
"step": 11432
},
{
"epoch": 0.72,
"grad_norm": 0.8249446153640747,
"learning_rate": 1.8639079166342438e-06,
"loss": 0.5258,
"step": 11433
},
{
"epoch": 0.72,
"grad_norm": 0.8744306564331055,
"learning_rate": 1.8631088942467452e-06,
"loss": 0.5818,
"step": 11434
},
{
"epoch": 0.72,
"grad_norm": 0.8946027159690857,
"learning_rate": 1.8623100039428194e-06,
"loss": 0.5504,
"step": 11435
},
{
"epoch": 0.72,
"grad_norm": 0.8908700346946716,
"learning_rate": 1.8615112457561013e-06,
"loss": 0.582,
"step": 11436
},
{
"epoch": 0.72,
"grad_norm": 0.86359703540802,
"learning_rate": 1.860712619720228e-06,
"loss": 0.6233,
"step": 11437
},
{
"epoch": 0.72,
"grad_norm": 0.92805016040802,
"learning_rate": 1.8599141258688274e-06,
"loss": 0.5893,
"step": 11438
},
{
"epoch": 0.72,
"grad_norm": 0.8947566151618958,
"learning_rate": 1.8591157642355179e-06,
"loss": 0.5218,
"step": 11439
},
{
"epoch": 0.72,
"grad_norm": 0.9130182266235352,
"learning_rate": 1.8583175348539173e-06,
"loss": 0.5615,
"step": 11440
},
{
"epoch": 0.72,
"grad_norm": 0.8366416096687317,
"learning_rate": 1.8575194377576355e-06,
"loss": 0.5131,
"step": 11441
},
{
"epoch": 0.72,
"grad_norm": 0.8980015516281128,
"learning_rate": 1.856721472980279e-06,
"loss": 0.5609,
"step": 11442
},
{
"epoch": 0.72,
"grad_norm": 0.8970168232917786,
"learning_rate": 1.855923640555448e-06,
"loss": 0.5611,
"step": 11443
},
{
"epoch": 0.73,
"grad_norm": 0.8903645873069763,
"learning_rate": 1.8551259405167315e-06,
"loss": 0.5523,
"step": 11444
},
{
"epoch": 0.73,
"grad_norm": 0.8101871609687805,
"learning_rate": 1.8543283728977234e-06,
"loss": 0.5412,
"step": 11445
},
{
"epoch": 0.73,
"grad_norm": 0.8400049209594727,
"learning_rate": 1.8535309377320059e-06,
"loss": 0.5745,
"step": 11446
},
{
"epoch": 0.73,
"grad_norm": 0.8555065393447876,
"learning_rate": 1.8527336350531532e-06,
"loss": 0.5557,
"step": 11447
},
{
"epoch": 0.73,
"grad_norm": 0.858265221118927,
"learning_rate": 1.851936464894739e-06,
"loss": 0.5744,
"step": 11448
},
{
"epoch": 0.73,
"grad_norm": 0.925983190536499,
"learning_rate": 1.8511394272903287e-06,
"loss": 0.6146,
"step": 11449
},
{
"epoch": 0.73,
"grad_norm": 0.9145652651786804,
"learning_rate": 1.8503425222734834e-06,
"loss": 0.5427,
"step": 11450
},
{
"epoch": 0.73,
"grad_norm": 0.8962170481681824,
"learning_rate": 1.8495457498777585e-06,
"loss": 0.6176,
"step": 11451
},
{
"epoch": 0.73,
"grad_norm": 0.8576472997665405,
"learning_rate": 1.8487491101367016e-06,
"loss": 0.5613,
"step": 11452
},
{
"epoch": 0.73,
"grad_norm": 0.9138413667678833,
"learning_rate": 1.8479526030838552e-06,
"loss": 0.6351,
"step": 11453
},
{
"epoch": 0.73,
"grad_norm": 0.8952200412750244,
"learning_rate": 1.8471562287527627e-06,
"loss": 0.5977,
"step": 11454
},
{
"epoch": 0.73,
"grad_norm": 0.9184353351593018,
"learning_rate": 1.8463599871769516e-06,
"loss": 0.6327,
"step": 11455
},
{
"epoch": 0.73,
"grad_norm": 0.9197295904159546,
"learning_rate": 1.8455638783899515e-06,
"loss": 0.6019,
"step": 11456
},
{
"epoch": 0.73,
"grad_norm": 0.8394375443458557,
"learning_rate": 1.8447679024252825e-06,
"loss": 0.5873,
"step": 11457
},
{
"epoch": 0.73,
"grad_norm": 0.8649691939353943,
"learning_rate": 1.8439720593164606e-06,
"loss": 0.5927,
"step": 11458
},
{
"epoch": 0.73,
"grad_norm": 0.8406863808631897,
"learning_rate": 1.8431763490969968e-06,
"loss": 0.5791,
"step": 11459
},
{
"epoch": 0.73,
"grad_norm": 0.8677908182144165,
"learning_rate": 1.8423807718003967e-06,
"loss": 0.5523,
"step": 11460
},
{
"epoch": 0.73,
"grad_norm": 0.8684352040290833,
"learning_rate": 1.8415853274601541e-06,
"loss": 0.5395,
"step": 11461
},
{
"epoch": 0.73,
"grad_norm": 0.9893013834953308,
"learning_rate": 1.8407900161097698e-06,
"loss": 0.5907,
"step": 11462
},
{
"epoch": 0.73,
"grad_norm": 0.9031257033348083,
"learning_rate": 1.839994837782726e-06,
"loss": 0.576,
"step": 11463
},
{
"epoch": 0.73,
"grad_norm": 0.8636795282363892,
"learning_rate": 1.8391997925125066e-06,
"loss": 0.6069,
"step": 11464
},
{
"epoch": 0.73,
"grad_norm": 0.8956241607666016,
"learning_rate": 1.8384048803325887e-06,
"loss": 0.5795,
"step": 11465
},
{
"epoch": 0.73,
"grad_norm": 0.857373058795929,
"learning_rate": 1.8376101012764424e-06,
"loss": 0.5387,
"step": 11466
},
{
"epoch": 0.73,
"grad_norm": 0.875593364238739,
"learning_rate": 1.8368154553775342e-06,
"loss": 0.5665,
"step": 11467
},
{
"epoch": 0.73,
"grad_norm": 0.90203857421875,
"learning_rate": 1.8360209426693242e-06,
"loss": 0.5414,
"step": 11468
},
{
"epoch": 0.73,
"grad_norm": 0.8598183393478394,
"learning_rate": 1.8352265631852645e-06,
"loss": 0.5466,
"step": 11469
},
{
"epoch": 0.73,
"grad_norm": 0.9762682914733887,
"learning_rate": 1.8344323169588045e-06,
"loss": 0.5858,
"step": 11470
},
{
"epoch": 0.73,
"grad_norm": 0.8744803071022034,
"learning_rate": 1.8336382040233874e-06,
"loss": 0.5745,
"step": 11471
},
{
"epoch": 0.73,
"grad_norm": 0.8582815527915955,
"learning_rate": 1.8328442244124506e-06,
"loss": 0.5103,
"step": 11472
},
{
"epoch": 0.73,
"grad_norm": 0.8855782747268677,
"learning_rate": 1.8320503781594273e-06,
"loss": 0.624,
"step": 11473
},
{
"epoch": 0.73,
"grad_norm": 0.8730263710021973,
"learning_rate": 1.8312566652977393e-06,
"loss": 0.5789,
"step": 11474
},
{
"epoch": 0.73,
"grad_norm": 0.926342248916626,
"learning_rate": 1.8304630858608107e-06,
"loss": 0.6076,
"step": 11475
},
{
"epoch": 0.73,
"grad_norm": 0.872226357460022,
"learning_rate": 1.8296696398820579e-06,
"loss": 0.5964,
"step": 11476
},
{
"epoch": 0.73,
"grad_norm": 0.9411280751228333,
"learning_rate": 1.828876327394886e-06,
"loss": 0.579,
"step": 11477
},
{
"epoch": 0.73,
"grad_norm": 0.8571912050247192,
"learning_rate": 1.8280831484327006e-06,
"loss": 0.5736,
"step": 11478
},
{
"epoch": 0.73,
"grad_norm": 0.8927587866783142,
"learning_rate": 1.8272901030288991e-06,
"loss": 0.6098,
"step": 11479
},
{
"epoch": 0.73,
"grad_norm": 0.845928966999054,
"learning_rate": 1.8264971912168744e-06,
"loss": 0.5546,
"step": 11480
},
{
"epoch": 0.73,
"grad_norm": 0.8704530000686646,
"learning_rate": 1.825704413030015e-06,
"loss": 0.5941,
"step": 11481
},
{
"epoch": 0.73,
"grad_norm": 0.9911707639694214,
"learning_rate": 1.8249117685016983e-06,
"loss": 0.5893,
"step": 11482
},
{
"epoch": 0.73,
"grad_norm": 0.9240842461585999,
"learning_rate": 1.8241192576653e-06,
"loss": 0.6098,
"step": 11483
},
{
"epoch": 0.73,
"grad_norm": 1.0558674335479736,
"learning_rate": 1.8233268805541953e-06,
"loss": 0.5122,
"step": 11484
},
{
"epoch": 0.73,
"grad_norm": 0.8903986215591431,
"learning_rate": 1.8225346372017432e-06,
"loss": 0.5622,
"step": 11485
},
{
"epoch": 0.73,
"grad_norm": 0.9144458174705505,
"learning_rate": 1.8217425276413037e-06,
"loss": 0.5719,
"step": 11486
},
{
"epoch": 0.73,
"grad_norm": 0.8681707978248596,
"learning_rate": 1.8209505519062299e-06,
"loss": 0.554,
"step": 11487
},
{
"epoch": 0.73,
"grad_norm": 0.8360025882720947,
"learning_rate": 1.8201587100298694e-06,
"loss": 0.5348,
"step": 11488
},
{
"epoch": 0.73,
"grad_norm": 0.8641213178634644,
"learning_rate": 1.8193670020455656e-06,
"loss": 0.5632,
"step": 11489
},
{
"epoch": 0.73,
"grad_norm": 0.9490131735801697,
"learning_rate": 1.8185754279866508e-06,
"loss": 0.5454,
"step": 11490
},
{
"epoch": 0.73,
"grad_norm": 0.9039450883865356,
"learning_rate": 1.8177839878864562e-06,
"loss": 0.5132,
"step": 11491
},
{
"epoch": 0.73,
"grad_norm": 0.8818166851997375,
"learning_rate": 1.8169926817783106e-06,
"loss": 0.5789,
"step": 11492
},
{
"epoch": 0.73,
"grad_norm": 0.9329283237457275,
"learning_rate": 1.8162015096955288e-06,
"loss": 0.593,
"step": 11493
},
{
"epoch": 0.73,
"grad_norm": 0.9215501546859741,
"learning_rate": 1.8154104716714254e-06,
"loss": 0.5819,
"step": 11494
},
{
"epoch": 0.73,
"grad_norm": 0.9087458252906799,
"learning_rate": 1.814619567739309e-06,
"loss": 0.563,
"step": 11495
},
{
"epoch": 0.73,
"grad_norm": 0.9145926237106323,
"learning_rate": 1.8138287979324815e-06,
"loss": 0.571,
"step": 11496
},
{
"epoch": 0.73,
"grad_norm": 0.8668627142906189,
"learning_rate": 1.8130381622842414e-06,
"loss": 0.4685,
"step": 11497
},
{
"epoch": 0.73,
"grad_norm": 0.8847333788871765,
"learning_rate": 1.8122476608278755e-06,
"loss": 0.5919,
"step": 11498
},
{
"epoch": 0.73,
"grad_norm": 0.857651948928833,
"learning_rate": 1.8114572935966713e-06,
"loss": 0.574,
"step": 11499
},
{
"epoch": 0.73,
"grad_norm": 0.8987635374069214,
"learning_rate": 1.8106670606239086e-06,
"loss": 0.5646,
"step": 11500
},
{
"epoch": 0.73,
"grad_norm": 0.8433480262756348,
"learning_rate": 1.8098769619428607e-06,
"loss": 0.5461,
"step": 11501
},
{
"epoch": 0.73,
"grad_norm": 0.8456798791885376,
"learning_rate": 1.8090869975867964e-06,
"loss": 0.5271,
"step": 11502
},
{
"epoch": 0.73,
"grad_norm": 0.9002053737640381,
"learning_rate": 1.8082971675889798e-06,
"loss": 0.6169,
"step": 11503
},
{
"epoch": 0.73,
"grad_norm": 0.8994352221488953,
"learning_rate": 1.8075074719826636e-06,
"loss": 0.5652,
"step": 11504
},
{
"epoch": 0.73,
"grad_norm": 0.8358734846115112,
"learning_rate": 1.8067179108011047e-06,
"loss": 0.5523,
"step": 11505
},
{
"epoch": 0.73,
"grad_norm": 0.8662353754043579,
"learning_rate": 1.8059284840775443e-06,
"loss": 0.5645,
"step": 11506
},
{
"epoch": 0.73,
"grad_norm": 0.8541300296783447,
"learning_rate": 1.8051391918452244e-06,
"loss": 0.5972,
"step": 11507
},
{
"epoch": 0.73,
"grad_norm": 0.9039734601974487,
"learning_rate": 1.8043500341373788e-06,
"loss": 0.5526,
"step": 11508
},
{
"epoch": 0.73,
"grad_norm": 0.8430119156837463,
"learning_rate": 1.8035610109872364e-06,
"loss": 0.588,
"step": 11509
},
{
"epoch": 0.73,
"grad_norm": 0.9176574349403381,
"learning_rate": 1.8027721224280204e-06,
"loss": 0.6203,
"step": 11510
},
{
"epoch": 0.73,
"grad_norm": 0.874259352684021,
"learning_rate": 1.8019833684929493e-06,
"loss": 0.5835,
"step": 11511
},
{
"epoch": 0.73,
"grad_norm": 0.8891341686248779,
"learning_rate": 1.8011947492152303e-06,
"loss": 0.59,
"step": 11512
},
{
"epoch": 0.73,
"grad_norm": 0.9052767753601074,
"learning_rate": 1.8004062646280762e-06,
"loss": 0.5455,
"step": 11513
},
{
"epoch": 0.73,
"grad_norm": 0.9022553563117981,
"learning_rate": 1.799617914764682e-06,
"loss": 0.5658,
"step": 11514
},
{
"epoch": 0.73,
"grad_norm": 0.9340382814407349,
"learning_rate": 1.7988296996582438e-06,
"loss": 0.6134,
"step": 11515
},
{
"epoch": 0.73,
"grad_norm": 0.9259970188140869,
"learning_rate": 1.7980416193419509e-06,
"loss": 0.561,
"step": 11516
},
{
"epoch": 0.73,
"grad_norm": 0.9039214849472046,
"learning_rate": 1.7972536738489865e-06,
"loss": 0.5975,
"step": 11517
},
{
"epoch": 0.73,
"grad_norm": 0.8830257654190063,
"learning_rate": 1.7964658632125286e-06,
"loss": 0.6171,
"step": 11518
},
{
"epoch": 0.73,
"grad_norm": 0.8530765771865845,
"learning_rate": 1.7956781874657508e-06,
"loss": 0.5773,
"step": 11519
},
{
"epoch": 0.73,
"grad_norm": 0.9169198274612427,
"learning_rate": 1.7948906466418154e-06,
"loss": 0.6076,
"step": 11520
},
{
"epoch": 0.73,
"grad_norm": 0.8221704959869385,
"learning_rate": 1.7941032407738857e-06,
"loss": 0.5213,
"step": 11521
},
{
"epoch": 0.73,
"grad_norm": 0.9299726486206055,
"learning_rate": 1.7933159698951153e-06,
"loss": 0.5747,
"step": 11522
},
{
"epoch": 0.73,
"grad_norm": 0.8379265666007996,
"learning_rate": 1.7925288340386543e-06,
"loss": 0.6024,
"step": 11523
},
{
"epoch": 0.73,
"grad_norm": 0.8032079339027405,
"learning_rate": 1.7917418332376463e-06,
"loss": 0.5009,
"step": 11524
},
{
"epoch": 0.73,
"grad_norm": 0.885210394859314,
"learning_rate": 1.7909549675252291e-06,
"loss": 0.5925,
"step": 11525
},
{
"epoch": 0.73,
"grad_norm": 0.8500308394432068,
"learning_rate": 1.7901682369345346e-06,
"loss": 0.5507,
"step": 11526
},
{
"epoch": 0.73,
"grad_norm": 0.8852202296257019,
"learning_rate": 1.7893816414986915e-06,
"loss": 0.5658,
"step": 11527
},
{
"epoch": 0.73,
"grad_norm": 0.9225091934204102,
"learning_rate": 1.7885951812508163e-06,
"loss": 0.5696,
"step": 11528
},
{
"epoch": 0.73,
"grad_norm": 0.9719336032867432,
"learning_rate": 1.787808856224027e-06,
"loss": 0.6464,
"step": 11529
},
{
"epoch": 0.73,
"grad_norm": 0.9021725654602051,
"learning_rate": 1.7870226664514318e-06,
"loss": 0.5704,
"step": 11530
},
{
"epoch": 0.73,
"grad_norm": 0.8923550844192505,
"learning_rate": 1.786236611966135e-06,
"loss": 0.605,
"step": 11531
},
{
"epoch": 0.73,
"grad_norm": 0.9258638620376587,
"learning_rate": 1.7854506928012349e-06,
"loss": 0.5321,
"step": 11532
},
{
"epoch": 0.73,
"grad_norm": 0.8562982082366943,
"learning_rate": 1.784664908989825e-06,
"loss": 0.5608,
"step": 11533
},
{
"epoch": 0.73,
"grad_norm": 0.9579175710678101,
"learning_rate": 1.7838792605649874e-06,
"loss": 0.6364,
"step": 11534
},
{
"epoch": 0.73,
"grad_norm": 0.8694881200790405,
"learning_rate": 1.7830937475598092e-06,
"loss": 0.563,
"step": 11535
},
{
"epoch": 0.73,
"grad_norm": 0.9621427655220032,
"learning_rate": 1.7823083700073607e-06,
"loss": 0.5745,
"step": 11536
},
{
"epoch": 0.73,
"grad_norm": 0.8175010085105896,
"learning_rate": 1.781523127940713e-06,
"loss": 0.5574,
"step": 11537
},
{
"epoch": 0.73,
"grad_norm": 0.9051305055618286,
"learning_rate": 1.7807380213929304e-06,
"loss": 0.5485,
"step": 11538
},
{
"epoch": 0.73,
"grad_norm": 0.9284529089927673,
"learning_rate": 1.7799530503970707e-06,
"loss": 0.5776,
"step": 11539
},
{
"epoch": 0.73,
"grad_norm": 0.9079828262329102,
"learning_rate": 1.7791682149861866e-06,
"loss": 0.5703,
"step": 11540
},
{
"epoch": 0.73,
"grad_norm": 0.8822880387306213,
"learning_rate": 1.778383515193326e-06,
"loss": 0.6083,
"step": 11541
},
{
"epoch": 0.73,
"grad_norm": 0.8300610184669495,
"learning_rate": 1.777598951051525e-06,
"loss": 0.5038,
"step": 11542
},
{
"epoch": 0.73,
"grad_norm": 0.8688510656356812,
"learning_rate": 1.7768145225938254e-06,
"loss": 0.5166,
"step": 11543
},
{
"epoch": 0.73,
"grad_norm": 0.8871831297874451,
"learning_rate": 1.7760302298532522e-06,
"loss": 0.5148,
"step": 11544
},
{
"epoch": 0.73,
"grad_norm": 0.8553435802459717,
"learning_rate": 1.7752460728628308e-06,
"loss": 0.5087,
"step": 11545
},
{
"epoch": 0.73,
"grad_norm": 0.8729947805404663,
"learning_rate": 1.7744620516555804e-06,
"loss": 0.5862,
"step": 11546
},
{
"epoch": 0.73,
"grad_norm": 0.9969896078109741,
"learning_rate": 1.7736781662645092e-06,
"loss": 0.6322,
"step": 11547
},
{
"epoch": 0.73,
"grad_norm": 0.9456208944320679,
"learning_rate": 1.7728944167226287e-06,
"loss": 0.5841,
"step": 11548
},
{
"epoch": 0.73,
"grad_norm": 0.9061382412910461,
"learning_rate": 1.772110803062939e-06,
"loss": 0.5942,
"step": 11549
},
{
"epoch": 0.73,
"grad_norm": 0.9012535810470581,
"learning_rate": 1.7713273253184331e-06,
"loss": 0.5462,
"step": 11550
},
{
"epoch": 0.73,
"grad_norm": 0.8942342400550842,
"learning_rate": 1.7705439835221022e-06,
"loss": 0.6231,
"step": 11551
},
{
"epoch": 0.73,
"grad_norm": 0.9196451902389526,
"learning_rate": 1.7697607777069291e-06,
"loss": 0.5851,
"step": 11552
},
{
"epoch": 0.73,
"grad_norm": 0.9161397814750671,
"learning_rate": 1.7689777079058929e-06,
"loss": 0.5397,
"step": 11553
},
{
"epoch": 0.73,
"grad_norm": 0.870907187461853,
"learning_rate": 1.7681947741519668e-06,
"loss": 0.5578,
"step": 11554
},
{
"epoch": 0.73,
"grad_norm": 0.9699699282646179,
"learning_rate": 1.7674119764781129e-06,
"loss": 0.639,
"step": 11555
},
{
"epoch": 0.73,
"grad_norm": 0.8969030380249023,
"learning_rate": 1.7666293149172969e-06,
"loss": 0.6149,
"step": 11556
},
{
"epoch": 0.73,
"grad_norm": 0.8886342644691467,
"learning_rate": 1.7658467895024744e-06,
"loss": 0.5669,
"step": 11557
},
{
"epoch": 0.73,
"grad_norm": 0.9235454797744751,
"learning_rate": 1.7650644002665906e-06,
"loss": 0.6232,
"step": 11558
},
{
"epoch": 0.73,
"grad_norm": 0.8997302055358887,
"learning_rate": 1.7642821472425918e-06,
"loss": 0.5862,
"step": 11559
},
{
"epoch": 0.73,
"grad_norm": 0.965051531791687,
"learning_rate": 1.7635000304634154e-06,
"loss": 0.569,
"step": 11560
},
{
"epoch": 0.73,
"grad_norm": 0.8321825861930847,
"learning_rate": 1.762718049961994e-06,
"loss": 0.5761,
"step": 11561
},
{
"epoch": 0.73,
"grad_norm": 0.8804091811180115,
"learning_rate": 1.7619362057712552e-06,
"loss": 0.5491,
"step": 11562
},
{
"epoch": 0.73,
"grad_norm": 0.8647125363349915,
"learning_rate": 1.761154497924117e-06,
"loss": 0.5474,
"step": 11563
},
{
"epoch": 0.73,
"grad_norm": 0.9047082662582397,
"learning_rate": 1.7603729264534936e-06,
"loss": 0.5333,
"step": 11564
},
{
"epoch": 0.73,
"grad_norm": 0.8758959174156189,
"learning_rate": 1.7595914913923001e-06,
"loss": 0.6063,
"step": 11565
},
{
"epoch": 0.73,
"grad_norm": 0.8838177919387817,
"learning_rate": 1.7588101927734346e-06,
"loss": 0.5993,
"step": 11566
},
{
"epoch": 0.73,
"grad_norm": 0.8192143440246582,
"learning_rate": 1.7580290306297965e-06,
"loss": 0.5965,
"step": 11567
},
{
"epoch": 0.73,
"grad_norm": 0.8735188245773315,
"learning_rate": 1.7572480049942781e-06,
"loss": 0.6107,
"step": 11568
},
{
"epoch": 0.73,
"grad_norm": 0.8884807229042053,
"learning_rate": 1.7564671158997653e-06,
"loss": 0.5726,
"step": 11569
},
{
"epoch": 0.73,
"grad_norm": 0.9197561144828796,
"learning_rate": 1.755686363379141e-06,
"loss": 0.5968,
"step": 11570
},
{
"epoch": 0.73,
"grad_norm": 0.9510713815689087,
"learning_rate": 1.7549057474652753e-06,
"loss": 0.6455,
"step": 11571
},
{
"epoch": 0.73,
"grad_norm": 0.9018495082855225,
"learning_rate": 1.7541252681910386e-06,
"loss": 0.5914,
"step": 11572
},
{
"epoch": 0.73,
"grad_norm": 0.8656198382377625,
"learning_rate": 1.7533449255892986e-06,
"loss": 0.5308,
"step": 11573
},
{
"epoch": 0.73,
"grad_norm": 0.9031473994255066,
"learning_rate": 1.7525647196929079e-06,
"loss": 0.5924,
"step": 11574
},
{
"epoch": 0.73,
"grad_norm": 0.966624915599823,
"learning_rate": 1.7517846505347197e-06,
"loss": 0.5615,
"step": 11575
},
{
"epoch": 0.73,
"grad_norm": 0.8910838961601257,
"learning_rate": 1.751004718147582e-06,
"loss": 0.5599,
"step": 11576
},
{
"epoch": 0.73,
"grad_norm": 0.8924875855445862,
"learning_rate": 1.7502249225643291e-06,
"loss": 0.5545,
"step": 11577
},
{
"epoch": 0.73,
"grad_norm": 0.8473634123802185,
"learning_rate": 1.7494452638178039e-06,
"loss": 0.5356,
"step": 11578
},
{
"epoch": 0.73,
"grad_norm": 0.8900013566017151,
"learning_rate": 1.7486657419408287e-06,
"loss": 0.5623,
"step": 11579
},
{
"epoch": 0.73,
"grad_norm": 0.859286367893219,
"learning_rate": 1.7478863569662286e-06,
"loss": 0.5538,
"step": 11580
},
{
"epoch": 0.73,
"grad_norm": 0.9663856029510498,
"learning_rate": 1.7471071089268204e-06,
"loss": 0.5877,
"step": 11581
},
{
"epoch": 0.73,
"grad_norm": 0.9221107959747314,
"learning_rate": 1.7463279978554166e-06,
"loss": 0.5999,
"step": 11582
},
{
"epoch": 0.73,
"grad_norm": 0.8356893062591553,
"learning_rate": 1.745549023784821e-06,
"loss": 0.5129,
"step": 11583
},
{
"epoch": 0.73,
"grad_norm": 0.9485192894935608,
"learning_rate": 1.7447701867478372e-06,
"loss": 0.5601,
"step": 11584
},
{
"epoch": 0.73,
"grad_norm": 0.9047239422798157,
"learning_rate": 1.7439914867772529e-06,
"loss": 0.5176,
"step": 11585
},
{
"epoch": 0.73,
"grad_norm": 0.9019331932067871,
"learning_rate": 1.7432129239058637e-06,
"loss": 0.5698,
"step": 11586
},
{
"epoch": 0.73,
"grad_norm": 0.8926165699958801,
"learning_rate": 1.7424344981664475e-06,
"loss": 0.6147,
"step": 11587
},
{
"epoch": 0.73,
"grad_norm": 0.9237696528434753,
"learning_rate": 1.7416562095917822e-06,
"loss": 0.5531,
"step": 11588
},
{
"epoch": 0.73,
"grad_norm": 0.8881582021713257,
"learning_rate": 1.7408780582146383e-06,
"loss": 0.5981,
"step": 11589
},
{
"epoch": 0.73,
"grad_norm": 0.8784075975418091,
"learning_rate": 1.7401000440677824e-06,
"loss": 0.5443,
"step": 11590
},
{
"epoch": 0.73,
"grad_norm": 0.8897961974143982,
"learning_rate": 1.7393221671839727e-06,
"loss": 0.5622,
"step": 11591
},
{
"epoch": 0.73,
"grad_norm": 0.9459214806556702,
"learning_rate": 1.7385444275959657e-06,
"loss": 0.5154,
"step": 11592
},
{
"epoch": 0.73,
"grad_norm": 0.8417472839355469,
"learning_rate": 1.7377668253365054e-06,
"loss": 0.5909,
"step": 11593
},
{
"epoch": 0.73,
"grad_norm": 0.8860768675804138,
"learning_rate": 1.7369893604383353e-06,
"loss": 0.5721,
"step": 11594
},
{
"epoch": 0.73,
"grad_norm": 0.8790547251701355,
"learning_rate": 1.736212032934192e-06,
"loss": 0.5747,
"step": 11595
},
{
"epoch": 0.73,
"grad_norm": 0.8245856165885925,
"learning_rate": 1.7354348428568063e-06,
"loss": 0.5412,
"step": 11596
},
{
"epoch": 0.73,
"grad_norm": 0.9556723237037659,
"learning_rate": 1.7346577902389028e-06,
"loss": 0.5856,
"step": 11597
},
{
"epoch": 0.73,
"grad_norm": 0.8890882730484009,
"learning_rate": 1.7338808751132002e-06,
"loss": 0.564,
"step": 11598
},
{
"epoch": 0.73,
"grad_norm": 0.8770986795425415,
"learning_rate": 1.7331040975124125e-06,
"loss": 0.5536,
"step": 11599
},
{
"epoch": 0.73,
"grad_norm": 0.8428150415420532,
"learning_rate": 1.7323274574692479e-06,
"loss": 0.529,
"step": 11600
},
{
"epoch": 0.73,
"grad_norm": 0.9159516096115112,
"learning_rate": 1.7315509550164044e-06,
"loss": 0.5696,
"step": 11601
},
{
"epoch": 0.74,
"grad_norm": 0.8555203676223755,
"learning_rate": 1.730774590186579e-06,
"loss": 0.6464,
"step": 11602
},
{
"epoch": 0.74,
"grad_norm": 0.911897599697113,
"learning_rate": 1.7299983630124663e-06,
"loss": 0.5728,
"step": 11603
},
{
"epoch": 0.74,
"grad_norm": 0.8873314261436462,
"learning_rate": 1.729222273526745e-06,
"loss": 0.5747,
"step": 11604
},
{
"epoch": 0.74,
"grad_norm": 0.8664464354515076,
"learning_rate": 1.7284463217620955e-06,
"loss": 0.551,
"step": 11605
},
{
"epoch": 0.74,
"grad_norm": 0.9194732308387756,
"learning_rate": 1.727670507751193e-06,
"loss": 0.5568,
"step": 11606
},
{
"epoch": 0.74,
"grad_norm": 0.9157373905181885,
"learning_rate": 1.7268948315266975e-06,
"loss": 0.6275,
"step": 11607
},
{
"epoch": 0.74,
"grad_norm": 0.9254802465438843,
"learning_rate": 1.7261192931212783e-06,
"loss": 0.604,
"step": 11608
},
{
"epoch": 0.74,
"grad_norm": 0.870588481426239,
"learning_rate": 1.7253438925675847e-06,
"loss": 0.526,
"step": 11609
},
{
"epoch": 0.74,
"grad_norm": 0.9618417024612427,
"learning_rate": 1.7245686298982678e-06,
"loss": 0.6359,
"step": 11610
},
{
"epoch": 0.74,
"grad_norm": 0.9000369310379028,
"learning_rate": 1.723793505145972e-06,
"loss": 0.6009,
"step": 11611
},
{
"epoch": 0.74,
"grad_norm": 0.8883331418037415,
"learning_rate": 1.7230185183433345e-06,
"loss": 0.5614,
"step": 11612
},
{
"epoch": 0.74,
"grad_norm": 0.8387264609336853,
"learning_rate": 1.722243669522987e-06,
"loss": 0.5603,
"step": 11613
},
{
"epoch": 0.74,
"grad_norm": 0.8569300770759583,
"learning_rate": 1.7214689587175582e-06,
"loss": 0.6069,
"step": 11614
},
{
"epoch": 0.74,
"grad_norm": 0.8399550318717957,
"learning_rate": 1.720694385959663e-06,
"loss": 0.5225,
"step": 11615
},
{
"epoch": 0.74,
"grad_norm": 0.8741680979728699,
"learning_rate": 1.7199199512819225e-06,
"loss": 0.5591,
"step": 11616
},
{
"epoch": 0.74,
"grad_norm": 0.9207227826118469,
"learning_rate": 1.7191456547169405e-06,
"loss": 0.6151,
"step": 11617
},
{
"epoch": 0.74,
"grad_norm": 0.8906126022338867,
"learning_rate": 1.718371496297322e-06,
"loss": 0.606,
"step": 11618
},
{
"epoch": 0.74,
"grad_norm": 0.9442402720451355,
"learning_rate": 1.717597476055664e-06,
"loss": 0.6094,
"step": 11619
},
{
"epoch": 0.74,
"grad_norm": 0.9012939929962158,
"learning_rate": 1.716823594024557e-06,
"loss": 0.5826,
"step": 11620
},
{
"epoch": 0.74,
"grad_norm": 0.880403995513916,
"learning_rate": 1.716049850236588e-06,
"loss": 0.5725,
"step": 11621
},
{
"epoch": 0.74,
"grad_norm": 0.9011920690536499,
"learning_rate": 1.7152762447243365e-06,
"loss": 0.5993,
"step": 11622
},
{
"epoch": 0.74,
"grad_norm": 0.8702940940856934,
"learning_rate": 1.7145027775203748e-06,
"loss": 0.5512,
"step": 11623
},
{
"epoch": 0.74,
"grad_norm": 0.8984467387199402,
"learning_rate": 1.7137294486572714e-06,
"loss": 0.5759,
"step": 11624
},
{
"epoch": 0.74,
"grad_norm": 0.9334822297096252,
"learning_rate": 1.7129562581675885e-06,
"loss": 0.5788,
"step": 11625
},
{
"epoch": 0.74,
"grad_norm": 0.8600862622261047,
"learning_rate": 1.7121832060838833e-06,
"loss": 0.5691,
"step": 11626
},
{
"epoch": 0.74,
"grad_norm": 0.9075511693954468,
"learning_rate": 1.711410292438707e-06,
"loss": 0.6158,
"step": 11627
},
{
"epoch": 0.74,
"grad_norm": 0.8380544185638428,
"learning_rate": 1.7106375172646e-06,
"loss": 0.5239,
"step": 11628
},
{
"epoch": 0.74,
"grad_norm": 0.8987744450569153,
"learning_rate": 1.709864880594106e-06,
"loss": 0.5677,
"step": 11629
},
{
"epoch": 0.74,
"grad_norm": 0.8771459460258484,
"learning_rate": 1.7090923824597578e-06,
"loss": 0.6342,
"step": 11630
},
{
"epoch": 0.74,
"grad_norm": 0.9104797840118408,
"learning_rate": 1.70832002289408e-06,
"loss": 0.6225,
"step": 11631
},
{
"epoch": 0.74,
"grad_norm": 0.8693386316299438,
"learning_rate": 1.7075478019295943e-06,
"loss": 0.5796,
"step": 11632
},
{
"epoch": 0.74,
"grad_norm": 0.8541246056556702,
"learning_rate": 1.7067757195988178e-06,
"loss": 0.5259,
"step": 11633
},
{
"epoch": 0.74,
"grad_norm": 0.9266880750656128,
"learning_rate": 1.706003775934259e-06,
"loss": 0.5968,
"step": 11634
},
{
"epoch": 0.74,
"grad_norm": 0.8783169388771057,
"learning_rate": 1.705231970968424e-06,
"loss": 0.5978,
"step": 11635
},
{
"epoch": 0.74,
"grad_norm": 0.9030970335006714,
"learning_rate": 1.704460304733806e-06,
"loss": 0.6016,
"step": 11636
},
{
"epoch": 0.74,
"grad_norm": 0.9360423684120178,
"learning_rate": 1.7036887772629012e-06,
"loss": 0.6177,
"step": 11637
},
{
"epoch": 0.74,
"grad_norm": 0.8983248472213745,
"learning_rate": 1.7029173885881973e-06,
"loss": 0.5724,
"step": 11638
},
{
"epoch": 0.74,
"grad_norm": 0.8801354765892029,
"learning_rate": 1.7021461387421705e-06,
"loss": 0.5717,
"step": 11639
},
{
"epoch": 0.74,
"grad_norm": 0.8851686120033264,
"learning_rate": 1.7013750277572977e-06,
"loss": 0.5777,
"step": 11640
},
{
"epoch": 0.74,
"grad_norm": 0.9012311100959778,
"learning_rate": 1.7006040556660468e-06,
"loss": 0.5995,
"step": 11641
},
{
"epoch": 0.74,
"grad_norm": 0.7989736199378967,
"learning_rate": 1.6998332225008817e-06,
"loss": 0.5581,
"step": 11642
},
{
"epoch": 0.74,
"grad_norm": 0.8802455067634583,
"learning_rate": 1.6990625282942607e-06,
"loss": 0.5862,
"step": 11643
},
{
"epoch": 0.74,
"grad_norm": 0.938679039478302,
"learning_rate": 1.6982919730786323e-06,
"loss": 0.5618,
"step": 11644
},
{
"epoch": 0.74,
"grad_norm": 0.9325195550918579,
"learning_rate": 1.697521556886441e-06,
"loss": 0.6066,
"step": 11645
},
{
"epoch": 0.74,
"grad_norm": 0.8584638237953186,
"learning_rate": 1.6967512797501317e-06,
"loss": 0.5666,
"step": 11646
},
{
"epoch": 0.74,
"grad_norm": 0.9101821780204773,
"learning_rate": 1.6959811417021338e-06,
"loss": 0.5435,
"step": 11647
},
{
"epoch": 0.74,
"grad_norm": 0.9440627694129944,
"learning_rate": 1.6952111427748758e-06,
"loss": 0.5705,
"step": 11648
},
{
"epoch": 0.74,
"grad_norm": 0.9121119379997253,
"learning_rate": 1.69444128300078e-06,
"loss": 0.538,
"step": 11649
},
{
"epoch": 0.74,
"grad_norm": 0.8702934980392456,
"learning_rate": 1.6936715624122623e-06,
"loss": 0.561,
"step": 11650
},
{
"epoch": 0.74,
"grad_norm": 0.872009813785553,
"learning_rate": 1.6929019810417352e-06,
"loss": 0.5535,
"step": 11651
},
{
"epoch": 0.74,
"grad_norm": 0.8676707744598389,
"learning_rate": 1.6921325389215993e-06,
"loss": 0.5736,
"step": 11652
},
{
"epoch": 0.74,
"grad_norm": 0.8695118427276611,
"learning_rate": 1.6913632360842553e-06,
"loss": 0.5864,
"step": 11653
},
{
"epoch": 0.74,
"grad_norm": 0.8931376934051514,
"learning_rate": 1.6905940725620951e-06,
"loss": 0.5808,
"step": 11654
},
{
"epoch": 0.74,
"grad_norm": 0.9246284365653992,
"learning_rate": 1.6898250483875063e-06,
"loss": 0.6203,
"step": 11655
},
{
"epoch": 0.74,
"grad_norm": 0.8424333333969116,
"learning_rate": 1.6890561635928692e-06,
"loss": 0.5167,
"step": 11656
},
{
"epoch": 0.74,
"grad_norm": 0.8894586563110352,
"learning_rate": 1.6882874182105613e-06,
"loss": 0.5479,
"step": 11657
},
{
"epoch": 0.74,
"grad_norm": 0.8806304931640625,
"learning_rate": 1.6875188122729458e-06,
"loss": 0.5519,
"step": 11658
},
{
"epoch": 0.74,
"grad_norm": 0.8468473553657532,
"learning_rate": 1.6867503458123913e-06,
"loss": 0.5492,
"step": 11659
},
{
"epoch": 0.74,
"grad_norm": 0.8903117775917053,
"learning_rate": 1.6859820188612557e-06,
"loss": 0.5855,
"step": 11660
},
{
"epoch": 0.74,
"grad_norm": 0.8897051215171814,
"learning_rate": 1.6852138314518873e-06,
"loss": 0.5357,
"step": 11661
},
{
"epoch": 0.74,
"grad_norm": 0.9220659732818604,
"learning_rate": 1.6844457836166329e-06,
"loss": 0.5354,
"step": 11662
},
{
"epoch": 0.74,
"grad_norm": 0.9147717356681824,
"learning_rate": 1.6836778753878324e-06,
"loss": 0.5965,
"step": 11663
},
{
"epoch": 0.74,
"grad_norm": 0.9581725597381592,
"learning_rate": 1.68291010679782e-06,
"loss": 0.556,
"step": 11664
},
{
"epoch": 0.74,
"grad_norm": 0.8362496495246887,
"learning_rate": 1.6821424778789252e-06,
"loss": 0.556,
"step": 11665
},
{
"epoch": 0.74,
"grad_norm": 0.8997658491134644,
"learning_rate": 1.6813749886634657e-06,
"loss": 0.5754,
"step": 11666
},
{
"epoch": 0.74,
"grad_norm": 0.8712424039840698,
"learning_rate": 1.6806076391837622e-06,
"loss": 0.5229,
"step": 11667
},
{
"epoch": 0.74,
"grad_norm": 0.9129472374916077,
"learning_rate": 1.6798404294721254e-06,
"loss": 0.5505,
"step": 11668
},
{
"epoch": 0.74,
"grad_norm": 0.8357523083686829,
"learning_rate": 1.6790733595608567e-06,
"loss": 0.5906,
"step": 11669
},
{
"epoch": 0.74,
"grad_norm": 0.9335298538208008,
"learning_rate": 1.6783064294822559e-06,
"loss": 0.5469,
"step": 11670
},
{
"epoch": 0.74,
"grad_norm": 0.8826762437820435,
"learning_rate": 1.677539639268616e-06,
"loss": 0.5969,
"step": 11671
},
{
"epoch": 0.74,
"grad_norm": 0.8778190612792969,
"learning_rate": 1.6767729889522239e-06,
"loss": 0.5918,
"step": 11672
},
{
"epoch": 0.74,
"grad_norm": 0.9307227730751038,
"learning_rate": 1.6760064785653624e-06,
"loss": 0.593,
"step": 11673
},
{
"epoch": 0.74,
"grad_norm": 0.9407718181610107,
"learning_rate": 1.675240108140303e-06,
"loss": 0.5833,
"step": 11674
},
{
"epoch": 0.74,
"grad_norm": 0.8945533633232117,
"learning_rate": 1.674473877709315e-06,
"loss": 0.5462,
"step": 11675
},
{
"epoch": 0.74,
"grad_norm": 0.9327276945114136,
"learning_rate": 1.6737077873046669e-06,
"loss": 0.6013,
"step": 11676
},
{
"epoch": 0.74,
"grad_norm": 0.9071036577224731,
"learning_rate": 1.672941836958611e-06,
"loss": 0.5889,
"step": 11677
},
{
"epoch": 0.74,
"grad_norm": 0.9192063212394714,
"learning_rate": 1.6721760267033998e-06,
"loss": 0.5819,
"step": 11678
},
{
"epoch": 0.74,
"grad_norm": 0.8941338062286377,
"learning_rate": 1.6714103565712798e-06,
"loss": 0.5678,
"step": 11679
},
{
"epoch": 0.74,
"grad_norm": 0.8962938785552979,
"learning_rate": 1.6706448265944902e-06,
"loss": 0.5746,
"step": 11680
},
{
"epoch": 0.74,
"grad_norm": 0.8910273313522339,
"learning_rate": 1.6698794368052669e-06,
"loss": 0.5355,
"step": 11681
},
{
"epoch": 0.74,
"grad_norm": 0.872856855392456,
"learning_rate": 1.6691141872358336e-06,
"loss": 0.5956,
"step": 11682
},
{
"epoch": 0.74,
"grad_norm": 0.8627503514289856,
"learning_rate": 1.668349077918413e-06,
"loss": 0.5305,
"step": 11683
},
{
"epoch": 0.74,
"grad_norm": 0.9181475043296814,
"learning_rate": 1.6675841088852268e-06,
"loss": 0.5133,
"step": 11684
},
{
"epoch": 0.74,
"grad_norm": 0.8350986242294312,
"learning_rate": 1.666819280168479e-06,
"loss": 0.5133,
"step": 11685
},
{
"epoch": 0.74,
"grad_norm": 0.8253143429756165,
"learning_rate": 1.6660545918003762e-06,
"loss": 0.5165,
"step": 11686
},
{
"epoch": 0.74,
"grad_norm": 0.9417761564254761,
"learning_rate": 1.6652900438131181e-06,
"loss": 0.6201,
"step": 11687
},
{
"epoch": 0.74,
"grad_norm": 0.9664661288261414,
"learning_rate": 1.6645256362388922e-06,
"loss": 0.569,
"step": 11688
},
{
"epoch": 0.74,
"grad_norm": 0.9326826333999634,
"learning_rate": 1.663761369109892e-06,
"loss": 0.6134,
"step": 11689
},
{
"epoch": 0.74,
"grad_norm": 0.9516881108283997,
"learning_rate": 1.662997242458293e-06,
"loss": 0.5727,
"step": 11690
},
{
"epoch": 0.74,
"grad_norm": 0.9566120505332947,
"learning_rate": 1.6622332563162714e-06,
"loss": 0.6568,
"step": 11691
},
{
"epoch": 0.74,
"grad_norm": 0.838861882686615,
"learning_rate": 1.6614694107159962e-06,
"loss": 0.5633,
"step": 11692
},
{
"epoch": 0.74,
"grad_norm": 0.8725398182868958,
"learning_rate": 1.6607057056896304e-06,
"loss": 0.537,
"step": 11693
},
{
"epoch": 0.74,
"grad_norm": 0.8720897436141968,
"learning_rate": 1.6599421412693307e-06,
"loss": 0.5786,
"step": 11694
},
{
"epoch": 0.74,
"grad_norm": 0.8999429941177368,
"learning_rate": 1.65917871748725e-06,
"loss": 0.5867,
"step": 11695
},
{
"epoch": 0.74,
"grad_norm": 0.9211562275886536,
"learning_rate": 1.6584154343755276e-06,
"loss": 0.5425,
"step": 11696
},
{
"epoch": 0.74,
"grad_norm": 0.856549859046936,
"learning_rate": 1.6576522919663107e-06,
"loss": 0.5257,
"step": 11697
},
{
"epoch": 0.74,
"grad_norm": 0.88596510887146,
"learning_rate": 1.6568892902917267e-06,
"loss": 0.564,
"step": 11698
},
{
"epoch": 0.74,
"grad_norm": 0.8725159168243408,
"learning_rate": 1.6561264293839051e-06,
"loss": 0.5379,
"step": 11699
},
{
"epoch": 0.74,
"grad_norm": 0.9307702779769897,
"learning_rate": 1.6553637092749685e-06,
"loss": 0.5769,
"step": 11700
},
{
"epoch": 0.74,
"grad_norm": 0.8692091107368469,
"learning_rate": 1.6546011299970276e-06,
"loss": 0.5311,
"step": 11701
},
{
"epoch": 0.74,
"grad_norm": 0.8620391488075256,
"learning_rate": 1.6538386915821975e-06,
"loss": 0.5296,
"step": 11702
},
{
"epoch": 0.74,
"grad_norm": 0.8763535022735596,
"learning_rate": 1.6530763940625805e-06,
"loss": 0.5436,
"step": 11703
},
{
"epoch": 0.74,
"grad_norm": 0.970310628414154,
"learning_rate": 1.6523142374702722e-06,
"loss": 0.607,
"step": 11704
},
{
"epoch": 0.74,
"grad_norm": 0.9026583433151245,
"learning_rate": 1.6515522218373658e-06,
"loss": 0.5318,
"step": 11705
},
{
"epoch": 0.74,
"grad_norm": 0.9070267081260681,
"learning_rate": 1.6507903471959468e-06,
"loss": 0.5649,
"step": 11706
},
{
"epoch": 0.74,
"grad_norm": 0.9056694507598877,
"learning_rate": 1.6500286135780951e-06,
"loss": 0.6071,
"step": 11707
},
{
"epoch": 0.74,
"grad_norm": 0.8863142132759094,
"learning_rate": 1.6492670210158863e-06,
"loss": 0.5855,
"step": 11708
},
{
"epoch": 0.74,
"grad_norm": 0.8576910495758057,
"learning_rate": 1.6485055695413838e-06,
"loss": 0.5702,
"step": 11709
},
{
"epoch": 0.74,
"grad_norm": 0.9119299650192261,
"learning_rate": 1.6477442591866544e-06,
"loss": 0.5564,
"step": 11710
},
{
"epoch": 0.74,
"grad_norm": 0.9486945867538452,
"learning_rate": 1.6469830899837547e-06,
"loss": 0.5378,
"step": 11711
},
{
"epoch": 0.74,
"grad_norm": 0.8819604516029358,
"learning_rate": 1.6462220619647306e-06,
"loss": 0.5903,
"step": 11712
},
{
"epoch": 0.74,
"grad_norm": 0.8723688125610352,
"learning_rate": 1.6454611751616283e-06,
"loss": 0.5676,
"step": 11713
},
{
"epoch": 0.74,
"grad_norm": 0.8502789735794067,
"learning_rate": 1.6447004296064867e-06,
"loss": 0.543,
"step": 11714
},
{
"epoch": 0.74,
"grad_norm": 0.8809540867805481,
"learning_rate": 1.6439398253313377e-06,
"loss": 0.5899,
"step": 11715
},
{
"epoch": 0.74,
"grad_norm": 0.8517667055130005,
"learning_rate": 1.6431793623682096e-06,
"loss": 0.5581,
"step": 11716
},
{
"epoch": 0.74,
"grad_norm": 0.9315950870513916,
"learning_rate": 1.642419040749119e-06,
"loss": 0.6287,
"step": 11717
},
{
"epoch": 0.74,
"grad_norm": 0.9184224605560303,
"learning_rate": 1.6416588605060812e-06,
"loss": 0.5907,
"step": 11718
},
{
"epoch": 0.74,
"grad_norm": 0.9085866808891296,
"learning_rate": 1.6408988216711092e-06,
"loss": 0.5375,
"step": 11719
},
{
"epoch": 0.74,
"grad_norm": 0.8682625889778137,
"learning_rate": 1.6401389242762006e-06,
"loss": 0.5492,
"step": 11720
},
{
"epoch": 0.74,
"grad_norm": 0.871749997138977,
"learning_rate": 1.639379168353354e-06,
"loss": 0.5566,
"step": 11721
},
{
"epoch": 0.74,
"grad_norm": 0.8846398591995239,
"learning_rate": 1.6386195539345596e-06,
"loss": 0.566,
"step": 11722
},
{
"epoch": 0.74,
"grad_norm": 0.8326940536499023,
"learning_rate": 1.6378600810518026e-06,
"loss": 0.6035,
"step": 11723
},
{
"epoch": 0.74,
"grad_norm": 0.8957687616348267,
"learning_rate": 1.6371007497370612e-06,
"loss": 0.5666,
"step": 11724
},
{
"epoch": 0.74,
"grad_norm": 0.9184751510620117,
"learning_rate": 1.6363415600223103e-06,
"loss": 0.6243,
"step": 11725
},
{
"epoch": 0.74,
"grad_norm": 0.8690382838249207,
"learning_rate": 1.6355825119395118e-06,
"loss": 0.5514,
"step": 11726
},
{
"epoch": 0.74,
"grad_norm": 0.8801531791687012,
"learning_rate": 1.634823605520633e-06,
"loss": 0.5549,
"step": 11727
},
{
"epoch": 0.74,
"grad_norm": 0.8974312543869019,
"learning_rate": 1.634064840797624e-06,
"loss": 0.5657,
"step": 11728
},
{
"epoch": 0.74,
"grad_norm": 0.8583878874778748,
"learning_rate": 1.6333062178024355e-06,
"loss": 0.5825,
"step": 11729
},
{
"epoch": 0.74,
"grad_norm": 0.8436487913131714,
"learning_rate": 1.63254773656701e-06,
"loss": 0.5514,
"step": 11730
},
{
"epoch": 0.74,
"grad_norm": 0.8887004852294922,
"learning_rate": 1.6317893971232852e-06,
"loss": 0.5557,
"step": 11731
},
{
"epoch": 0.74,
"grad_norm": 0.9396257400512695,
"learning_rate": 1.6310311995031913e-06,
"loss": 0.5989,
"step": 11732
},
{
"epoch": 0.74,
"grad_norm": 0.9944149851799011,
"learning_rate": 1.6302731437386555e-06,
"loss": 0.6509,
"step": 11733
},
{
"epoch": 0.74,
"grad_norm": 0.8768121600151062,
"learning_rate": 1.6295152298615936e-06,
"loss": 0.5258,
"step": 11734
},
{
"epoch": 0.74,
"grad_norm": 0.9114717245101929,
"learning_rate": 1.62875745790392e-06,
"loss": 0.5883,
"step": 11735
},
{
"epoch": 0.74,
"grad_norm": 0.9182329177856445,
"learning_rate": 1.6279998278975428e-06,
"loss": 0.6177,
"step": 11736
},
{
"epoch": 0.74,
"grad_norm": 0.8736885190010071,
"learning_rate": 1.627242339874362e-06,
"loss": 0.5619,
"step": 11737
},
{
"epoch": 0.74,
"grad_norm": 0.8888165950775146,
"learning_rate": 1.6264849938662753e-06,
"loss": 0.6056,
"step": 11738
},
{
"epoch": 0.74,
"grad_norm": 0.9017614126205444,
"learning_rate": 1.6257277899051666e-06,
"loss": 0.6036,
"step": 11739
},
{
"epoch": 0.74,
"grad_norm": 0.9094336628913879,
"learning_rate": 1.6249707280229237e-06,
"loss": 0.6252,
"step": 11740
},
{
"epoch": 0.74,
"grad_norm": 0.8804279565811157,
"learning_rate": 1.6242138082514247e-06,
"loss": 0.6023,
"step": 11741
},
{
"epoch": 0.74,
"grad_norm": 0.8932421207427979,
"learning_rate": 1.6234570306225366e-06,
"loss": 0.5898,
"step": 11742
},
{
"epoch": 0.74,
"grad_norm": 0.8643161058425903,
"learning_rate": 1.6227003951681276e-06,
"loss": 0.5163,
"step": 11743
},
{
"epoch": 0.74,
"grad_norm": 0.8888043165206909,
"learning_rate": 1.6219439019200557e-06,
"loss": 0.5626,
"step": 11744
},
{
"epoch": 0.74,
"grad_norm": 0.9490756988525391,
"learning_rate": 1.6211875509101744e-06,
"loss": 0.6331,
"step": 11745
},
{
"epoch": 0.74,
"grad_norm": 0.8775157928466797,
"learning_rate": 1.6204313421703332e-06,
"loss": 0.5488,
"step": 11746
},
{
"epoch": 0.74,
"grad_norm": 0.9012529850006104,
"learning_rate": 1.6196752757323698e-06,
"loss": 0.6517,
"step": 11747
},
{
"epoch": 0.74,
"grad_norm": 0.8449646830558777,
"learning_rate": 1.61891935162812e-06,
"loss": 0.5482,
"step": 11748
},
{
"epoch": 0.74,
"grad_norm": 0.8353961110115051,
"learning_rate": 1.6181635698894171e-06,
"loss": 0.5155,
"step": 11749
},
{
"epoch": 0.74,
"grad_norm": 0.9023754596710205,
"learning_rate": 1.61740793054808e-06,
"loss": 0.6113,
"step": 11750
},
{
"epoch": 0.74,
"grad_norm": 0.8526588082313538,
"learning_rate": 1.6166524336359285e-06,
"loss": 0.537,
"step": 11751
},
{
"epoch": 0.74,
"grad_norm": 0.9162303805351257,
"learning_rate": 1.6158970791847728e-06,
"loss": 0.6146,
"step": 11752
},
{
"epoch": 0.74,
"grad_norm": 0.8880906701087952,
"learning_rate": 1.6151418672264186e-06,
"loss": 0.5587,
"step": 11753
},
{
"epoch": 0.74,
"grad_norm": 0.8889377117156982,
"learning_rate": 1.614386797792667e-06,
"loss": 0.5929,
"step": 11754
},
{
"epoch": 0.74,
"grad_norm": 0.817284882068634,
"learning_rate": 1.6136318709153075e-06,
"loss": 0.5572,
"step": 11755
},
{
"epoch": 0.74,
"grad_norm": 0.785580039024353,
"learning_rate": 1.612877086626129e-06,
"loss": 0.6277,
"step": 11756
},
{
"epoch": 0.74,
"grad_norm": 0.8508361577987671,
"learning_rate": 1.612122444956916e-06,
"loss": 0.5669,
"step": 11757
},
{
"epoch": 0.74,
"grad_norm": 0.8702815175056458,
"learning_rate": 1.6113679459394398e-06,
"loss": 0.5907,
"step": 11758
},
{
"epoch": 0.74,
"grad_norm": 0.8659467697143555,
"learning_rate": 1.6106135896054714e-06,
"loss": 0.574,
"step": 11759
},
{
"epoch": 0.75,
"grad_norm": 0.8882265686988831,
"learning_rate": 1.6098593759867736e-06,
"loss": 0.5649,
"step": 11760
},
{
"epoch": 0.75,
"grad_norm": 0.8908340334892273,
"learning_rate": 1.609105305115104e-06,
"loss": 0.5496,
"step": 11761
},
{
"epoch": 0.75,
"grad_norm": 0.8687838315963745,
"learning_rate": 1.6083513770222158e-06,
"loss": 0.5963,
"step": 11762
},
{
"epoch": 0.75,
"grad_norm": 0.893989622592926,
"learning_rate": 1.6075975917398512e-06,
"loss": 0.5502,
"step": 11763
},
{
"epoch": 0.75,
"grad_norm": 0.8989611864089966,
"learning_rate": 1.60684394929975e-06,
"loss": 0.6383,
"step": 11764
},
{
"epoch": 0.75,
"grad_norm": 0.8593994975090027,
"learning_rate": 1.6060904497336465e-06,
"loss": 0.5928,
"step": 11765
},
{
"epoch": 0.75,
"grad_norm": 0.8752898573875427,
"learning_rate": 1.6053370930732676e-06,
"loss": 0.5127,
"step": 11766
},
{
"epoch": 0.75,
"grad_norm": 0.8868995904922485,
"learning_rate": 1.6045838793503342e-06,
"loss": 0.5745,
"step": 11767
},
{
"epoch": 0.75,
"grad_norm": 0.872316837310791,
"learning_rate": 1.6038308085965642e-06,
"loss": 0.5879,
"step": 11768
},
{
"epoch": 0.75,
"grad_norm": 0.9360784888267517,
"learning_rate": 1.6030778808436609e-06,
"loss": 0.5412,
"step": 11769
},
{
"epoch": 0.75,
"grad_norm": 0.8613805174827576,
"learning_rate": 1.6023250961233338e-06,
"loss": 0.5846,
"step": 11770
},
{
"epoch": 0.75,
"grad_norm": 0.9219672083854675,
"learning_rate": 1.6015724544672762e-06,
"loss": 0.5664,
"step": 11771
},
{
"epoch": 0.75,
"grad_norm": 0.9142691493034363,
"learning_rate": 1.6008199559071795e-06,
"loss": 0.602,
"step": 11772
},
{
"epoch": 0.75,
"grad_norm": 0.8564930558204651,
"learning_rate": 1.6000676004747306e-06,
"loss": 0.5597,
"step": 11773
},
{
"epoch": 0.75,
"grad_norm": 0.9067575335502625,
"learning_rate": 1.5993153882016065e-06,
"loss": 0.6287,
"step": 11774
},
{
"epoch": 0.75,
"grad_norm": 0.8603774309158325,
"learning_rate": 1.5985633191194821e-06,
"loss": 0.6032,
"step": 11775
},
{
"epoch": 0.75,
"grad_norm": 0.9035540819168091,
"learning_rate": 1.5978113932600248e-06,
"loss": 0.5868,
"step": 11776
},
{
"epoch": 0.75,
"grad_norm": 0.9324126839637756,
"learning_rate": 1.5970596106548913e-06,
"loss": 0.5675,
"step": 11777
},
{
"epoch": 0.75,
"grad_norm": 0.8501653671264648,
"learning_rate": 1.5963079713357432e-06,
"loss": 0.5974,
"step": 11778
},
{
"epoch": 0.75,
"grad_norm": 0.8585829734802246,
"learning_rate": 1.595556475334224e-06,
"loss": 0.5757,
"step": 11779
},
{
"epoch": 0.75,
"grad_norm": 0.8865067362785339,
"learning_rate": 1.5948051226819783e-06,
"loss": 0.5329,
"step": 11780
},
{
"epoch": 0.75,
"grad_norm": 0.8885084986686707,
"learning_rate": 1.5940539134106442e-06,
"loss": 0.5593,
"step": 11781
},
{
"epoch": 0.75,
"grad_norm": 0.8946758508682251,
"learning_rate": 1.5933028475518486e-06,
"loss": 0.5682,
"step": 11782
},
{
"epoch": 0.75,
"grad_norm": 0.9001892805099487,
"learning_rate": 1.5925519251372212e-06,
"loss": 0.4956,
"step": 11783
},
{
"epoch": 0.75,
"grad_norm": 0.8397232890129089,
"learning_rate": 1.5918011461983796e-06,
"loss": 0.5367,
"step": 11784
},
{
"epoch": 0.75,
"grad_norm": 0.9217719435691833,
"learning_rate": 1.5910505107669339e-06,
"loss": 0.5639,
"step": 11785
},
{
"epoch": 0.75,
"grad_norm": 0.8915478587150574,
"learning_rate": 1.5903000188744922e-06,
"loss": 0.5673,
"step": 11786
},
{
"epoch": 0.75,
"grad_norm": 0.8859308362007141,
"learning_rate": 1.589549670552656e-06,
"loss": 0.589,
"step": 11787
},
{
"epoch": 0.75,
"grad_norm": 0.8340456485748291,
"learning_rate": 1.588799465833018e-06,
"loss": 0.5375,
"step": 11788
},
{
"epoch": 0.75,
"grad_norm": 0.950278639793396,
"learning_rate": 1.5880494047471683e-06,
"loss": 0.6206,
"step": 11789
},
{
"epoch": 0.75,
"grad_norm": 0.8300553560256958,
"learning_rate": 1.587299487326689e-06,
"loss": 0.5973,
"step": 11790
},
{
"epoch": 0.75,
"grad_norm": 0.9166631698608398,
"learning_rate": 1.586549713603156e-06,
"loss": 0.6043,
"step": 11791
},
{
"epoch": 0.75,
"grad_norm": 0.8950029611587524,
"learning_rate": 1.5858000836081422e-06,
"loss": 0.5707,
"step": 11792
},
{
"epoch": 0.75,
"grad_norm": 0.9038580060005188,
"learning_rate": 1.5850505973732077e-06,
"loss": 0.5714,
"step": 11793
},
{
"epoch": 0.75,
"grad_norm": 0.8897790908813477,
"learning_rate": 1.5843012549299131e-06,
"loss": 0.5668,
"step": 11794
},
{
"epoch": 0.75,
"grad_norm": 0.9130045175552368,
"learning_rate": 1.58355205630981e-06,
"loss": 0.6087,
"step": 11795
},
{
"epoch": 0.75,
"grad_norm": 0.8878775835037231,
"learning_rate": 1.5828030015444451e-06,
"loss": 0.5306,
"step": 11796
},
{
"epoch": 0.75,
"grad_norm": 0.8759022951126099,
"learning_rate": 1.5820540906653581e-06,
"loss": 0.6105,
"step": 11797
},
{
"epoch": 0.75,
"grad_norm": 0.8387483358383179,
"learning_rate": 1.5813053237040849e-06,
"loss": 0.565,
"step": 11798
},
{
"epoch": 0.75,
"grad_norm": 0.8774323463439941,
"learning_rate": 1.580556700692148e-06,
"loss": 0.5536,
"step": 11799
},
{
"epoch": 0.75,
"grad_norm": 0.9281049370765686,
"learning_rate": 1.5798082216610766e-06,
"loss": 0.5497,
"step": 11800
},
{
"epoch": 0.75,
"grad_norm": 0.8957639932632446,
"learning_rate": 1.5790598866423818e-06,
"loss": 0.6225,
"step": 11801
},
{
"epoch": 0.75,
"grad_norm": 0.8487939834594727,
"learning_rate": 1.5783116956675742e-06,
"loss": 0.5618,
"step": 11802
},
{
"epoch": 0.75,
"grad_norm": 0.8974397778511047,
"learning_rate": 1.5775636487681579e-06,
"loss": 0.5703,
"step": 11803
},
{
"epoch": 0.75,
"grad_norm": 0.9455395340919495,
"learning_rate": 1.5768157459756307e-06,
"loss": 0.6104,
"step": 11804
},
{
"epoch": 0.75,
"grad_norm": 0.9057279825210571,
"learning_rate": 1.576067987321484e-06,
"loss": 0.553,
"step": 11805
},
{
"epoch": 0.75,
"grad_norm": 0.9172567129135132,
"learning_rate": 1.5753203728372052e-06,
"loss": 0.5335,
"step": 11806
},
{
"epoch": 0.75,
"grad_norm": 0.8080207705497742,
"learning_rate": 1.5745729025542684e-06,
"loss": 0.5155,
"step": 11807
},
{
"epoch": 0.75,
"grad_norm": 0.9321126937866211,
"learning_rate": 1.5738255765041537e-06,
"loss": 0.6143,
"step": 11808
},
{
"epoch": 0.75,
"grad_norm": 0.8731662631034851,
"learning_rate": 1.5730783947183237e-06,
"loss": 0.5733,
"step": 11809
},
{
"epoch": 0.75,
"grad_norm": 0.8786374926567078,
"learning_rate": 1.5723313572282412e-06,
"loss": 0.5162,
"step": 11810
},
{
"epoch": 0.75,
"grad_norm": 0.9051015973091125,
"learning_rate": 1.5715844640653627e-06,
"loss": 0.5836,
"step": 11811
},
{
"epoch": 0.75,
"grad_norm": 0.9382368922233582,
"learning_rate": 1.5708377152611326e-06,
"loss": 0.5949,
"step": 11812
},
{
"epoch": 0.75,
"grad_norm": 0.8949106931686401,
"learning_rate": 1.5700911108469986e-06,
"loss": 0.5708,
"step": 11813
},
{
"epoch": 0.75,
"grad_norm": 0.925713837146759,
"learning_rate": 1.569344650854398e-06,
"loss": 0.5473,
"step": 11814
},
{
"epoch": 0.75,
"grad_norm": 0.8979496955871582,
"learning_rate": 1.5685983353147582e-06,
"loss": 0.578,
"step": 11815
},
{
"epoch": 0.75,
"grad_norm": 0.8621270060539246,
"learning_rate": 1.5678521642595052e-06,
"loss": 0.5517,
"step": 11816
},
{
"epoch": 0.75,
"grad_norm": 0.8650081157684326,
"learning_rate": 1.567106137720058e-06,
"loss": 0.507,
"step": 11817
},
{
"epoch": 0.75,
"grad_norm": 0.9151085615158081,
"learning_rate": 1.5663602557278297e-06,
"loss": 0.565,
"step": 11818
},
{
"epoch": 0.75,
"grad_norm": 0.8362554311752319,
"learning_rate": 1.5656145183142274e-06,
"loss": 0.5517,
"step": 11819
},
{
"epoch": 0.75,
"grad_norm": 0.9083791971206665,
"learning_rate": 1.5648689255106474e-06,
"loss": 0.5975,
"step": 11820
},
{
"epoch": 0.75,
"grad_norm": 0.9755656123161316,
"learning_rate": 1.5641234773484887e-06,
"loss": 0.5784,
"step": 11821
},
{
"epoch": 0.75,
"grad_norm": 0.8774923086166382,
"learning_rate": 1.5633781738591392e-06,
"loss": 0.5766,
"step": 11822
},
{
"epoch": 0.75,
"grad_norm": 0.9168820977210999,
"learning_rate": 1.5626330150739776e-06,
"loss": 0.616,
"step": 11823
},
{
"epoch": 0.75,
"grad_norm": 0.8971782326698303,
"learning_rate": 1.5618880010243831e-06,
"loss": 0.5556,
"step": 11824
},
{
"epoch": 0.75,
"grad_norm": 0.9524270296096802,
"learning_rate": 1.5611431317417235e-06,
"loss": 0.5918,
"step": 11825
},
{
"epoch": 0.75,
"grad_norm": 0.9112175107002258,
"learning_rate": 1.5603984072573648e-06,
"loss": 0.5589,
"step": 11826
},
{
"epoch": 0.75,
"grad_norm": 0.856706440448761,
"learning_rate": 1.5596538276026641e-06,
"loss": 0.5309,
"step": 11827
},
{
"epoch": 0.75,
"grad_norm": 0.8865464329719543,
"learning_rate": 1.5589093928089715e-06,
"loss": 0.5807,
"step": 11828
},
{
"epoch": 0.75,
"grad_norm": 0.8657694458961487,
"learning_rate": 1.5581651029076322e-06,
"loss": 0.5807,
"step": 11829
},
{
"epoch": 0.75,
"grad_norm": 0.9261035919189453,
"learning_rate": 1.5574209579299903e-06,
"loss": 0.5876,
"step": 11830
},
{
"epoch": 0.75,
"grad_norm": 0.918413519859314,
"learning_rate": 1.5566769579073747e-06,
"loss": 0.5667,
"step": 11831
},
{
"epoch": 0.75,
"grad_norm": 0.9813733696937561,
"learning_rate": 1.555933102871114e-06,
"loss": 0.5733,
"step": 11832
},
{
"epoch": 0.75,
"grad_norm": 0.9484089016914368,
"learning_rate": 1.5551893928525285e-06,
"loss": 0.6259,
"step": 11833
},
{
"epoch": 0.75,
"grad_norm": 0.9082149267196655,
"learning_rate": 1.5544458278829344e-06,
"loss": 0.6183,
"step": 11834
},
{
"epoch": 0.75,
"grad_norm": 0.9003174304962158,
"learning_rate": 1.5537024079936425e-06,
"loss": 0.5506,
"step": 11835
},
{
"epoch": 0.75,
"grad_norm": 0.9653313755989075,
"learning_rate": 1.5529591332159511e-06,
"loss": 0.6133,
"step": 11836
},
{
"epoch": 0.75,
"grad_norm": 0.9120768904685974,
"learning_rate": 1.5522160035811578e-06,
"loss": 0.5768,
"step": 11837
},
{
"epoch": 0.75,
"grad_norm": 0.8551223278045654,
"learning_rate": 1.551473019120558e-06,
"loss": 0.5806,
"step": 11838
},
{
"epoch": 0.75,
"grad_norm": 0.9125446677207947,
"learning_rate": 1.5507301798654313e-06,
"loss": 0.5718,
"step": 11839
},
{
"epoch": 0.75,
"grad_norm": 0.8963059782981873,
"learning_rate": 1.549987485847057e-06,
"loss": 0.556,
"step": 11840
},
{
"epoch": 0.75,
"grad_norm": 0.8334496021270752,
"learning_rate": 1.54924493709671e-06,
"loss": 0.5308,
"step": 11841
},
{
"epoch": 0.75,
"grad_norm": 0.8776934742927551,
"learning_rate": 1.5485025336456511e-06,
"loss": 0.643,
"step": 11842
},
{
"epoch": 0.75,
"grad_norm": 0.8814354538917542,
"learning_rate": 1.547760275525147e-06,
"loss": 0.5543,
"step": 11843
},
{
"epoch": 0.75,
"grad_norm": 0.8887062072753906,
"learning_rate": 1.547018162766446e-06,
"loss": 0.6306,
"step": 11844
},
{
"epoch": 0.75,
"grad_norm": 0.9002584218978882,
"learning_rate": 1.5462761954007987e-06,
"loss": 0.5831,
"step": 11845
},
{
"epoch": 0.75,
"grad_norm": 0.9003365635871887,
"learning_rate": 1.5455343734594463e-06,
"loss": 0.5889,
"step": 11846
},
{
"epoch": 0.75,
"grad_norm": 0.8967679142951965,
"learning_rate": 1.5447926969736237e-06,
"loss": 0.599,
"step": 11847
},
{
"epoch": 0.75,
"grad_norm": 0.9296191334724426,
"learning_rate": 1.5440511659745611e-06,
"loss": 0.5842,
"step": 11848
},
{
"epoch": 0.75,
"grad_norm": 0.903057873249054,
"learning_rate": 1.5433097804934833e-06,
"loss": 0.5436,
"step": 11849
},
{
"epoch": 0.75,
"grad_norm": 0.9655782580375671,
"learning_rate": 1.5425685405616026e-06,
"loss": 0.603,
"step": 11850
},
{
"epoch": 0.75,
"grad_norm": 0.9100805521011353,
"learning_rate": 1.5418274462101358e-06,
"loss": 0.5269,
"step": 11851
},
{
"epoch": 0.75,
"grad_norm": 0.9555662274360657,
"learning_rate": 1.541086497470284e-06,
"loss": 0.5998,
"step": 11852
},
{
"epoch": 0.75,
"grad_norm": 0.9616516828536987,
"learning_rate": 1.540345694373247e-06,
"loss": 0.5653,
"step": 11853
},
{
"epoch": 0.75,
"grad_norm": 0.8906912207603455,
"learning_rate": 1.5396050369502175e-06,
"loss": 0.5835,
"step": 11854
},
{
"epoch": 0.75,
"grad_norm": 0.9743589162826538,
"learning_rate": 1.538864525232382e-06,
"loss": 0.5809,
"step": 11855
},
{
"epoch": 0.75,
"grad_norm": 0.9580129981040955,
"learning_rate": 1.538124159250921e-06,
"loss": 0.5397,
"step": 11856
},
{
"epoch": 0.75,
"grad_norm": 0.9036690592765808,
"learning_rate": 1.5373839390370098e-06,
"loss": 0.6106,
"step": 11857
},
{
"epoch": 0.75,
"grad_norm": 0.9009885191917419,
"learning_rate": 1.5366438646218146e-06,
"loss": 0.5968,
"step": 11858
},
{
"epoch": 0.75,
"grad_norm": 0.8845691084861755,
"learning_rate": 1.5359039360364975e-06,
"loss": 0.5573,
"step": 11859
},
{
"epoch": 0.75,
"grad_norm": 0.8430015444755554,
"learning_rate": 1.5351641533122153e-06,
"loss": 0.5701,
"step": 11860
},
{
"epoch": 0.75,
"grad_norm": 0.8827622532844543,
"learning_rate": 1.5344245164801174e-06,
"loss": 0.5712,
"step": 11861
},
{
"epoch": 0.75,
"grad_norm": 0.8615781664848328,
"learning_rate": 1.533685025571347e-06,
"loss": 0.5173,
"step": 11862
},
{
"epoch": 0.75,
"grad_norm": 0.8814289569854736,
"learning_rate": 1.5329456806170418e-06,
"loss": 0.6011,
"step": 11863
},
{
"epoch": 0.75,
"grad_norm": 0.9278409481048584,
"learning_rate": 1.5322064816483328e-06,
"loss": 0.6135,
"step": 11864
},
{
"epoch": 0.75,
"grad_norm": 0.875895082950592,
"learning_rate": 1.5314674286963471e-06,
"loss": 0.6038,
"step": 11865
},
{
"epoch": 0.75,
"grad_norm": 0.9008811712265015,
"learning_rate": 1.5307285217922003e-06,
"loss": 0.6005,
"step": 11866
},
{
"epoch": 0.75,
"grad_norm": 0.8702130913734436,
"learning_rate": 1.529989760967005e-06,
"loss": 0.5811,
"step": 11867
},
{
"epoch": 0.75,
"grad_norm": 0.8915956616401672,
"learning_rate": 1.5292511462518728e-06,
"loss": 0.5713,
"step": 11868
},
{
"epoch": 0.75,
"grad_norm": 0.8408598899841309,
"learning_rate": 1.528512677677899e-06,
"loss": 0.5299,
"step": 11869
},
{
"epoch": 0.75,
"grad_norm": 0.8335807919502258,
"learning_rate": 1.5277743552761809e-06,
"loss": 0.5533,
"step": 11870
},
{
"epoch": 0.75,
"grad_norm": 0.8974030613899231,
"learning_rate": 1.5270361790778065e-06,
"loss": 0.5777,
"step": 11871
},
{
"epoch": 0.75,
"grad_norm": 0.8503268957138062,
"learning_rate": 1.526298149113854e-06,
"loss": 0.5747,
"step": 11872
},
{
"epoch": 0.75,
"grad_norm": 0.9573015570640564,
"learning_rate": 1.5255602654154055e-06,
"loss": 0.5811,
"step": 11873
},
{
"epoch": 0.75,
"grad_norm": 0.9180850386619568,
"learning_rate": 1.5248225280135258e-06,
"loss": 0.5726,
"step": 11874
},
{
"epoch": 0.75,
"grad_norm": 0.8586685657501221,
"learning_rate": 1.5240849369392807e-06,
"loss": 0.5612,
"step": 11875
},
{
"epoch": 0.75,
"grad_norm": 0.9390682578086853,
"learning_rate": 1.5233474922237268e-06,
"loss": 0.5809,
"step": 11876
},
{
"epoch": 0.75,
"grad_norm": 0.8708896636962891,
"learning_rate": 1.5226101938979153e-06,
"loss": 0.5575,
"step": 11877
},
{
"epoch": 0.75,
"grad_norm": 0.89445960521698,
"learning_rate": 1.5218730419928917e-06,
"loss": 0.5099,
"step": 11878
},
{
"epoch": 0.75,
"grad_norm": 0.884432315826416,
"learning_rate": 1.5211360365396972e-06,
"loss": 0.6021,
"step": 11879
},
{
"epoch": 0.75,
"grad_norm": 0.9297842979431152,
"learning_rate": 1.5203991775693577e-06,
"loss": 0.6123,
"step": 11880
},
{
"epoch": 0.75,
"grad_norm": 0.9308014512062073,
"learning_rate": 1.5196624651129084e-06,
"loss": 0.5901,
"step": 11881
},
{
"epoch": 0.75,
"grad_norm": 0.9186192154884338,
"learning_rate": 1.5189258992013635e-06,
"loss": 0.5711,
"step": 11882
},
{
"epoch": 0.75,
"grad_norm": 0.9167851209640503,
"learning_rate": 1.5181894798657388e-06,
"loss": 0.5695,
"step": 11883
},
{
"epoch": 0.75,
"grad_norm": 0.878422200679779,
"learning_rate": 1.517453207137043e-06,
"loss": 0.5535,
"step": 11884
},
{
"epoch": 0.75,
"grad_norm": 0.8583741784095764,
"learning_rate": 1.5167170810462777e-06,
"loss": 0.5945,
"step": 11885
},
{
"epoch": 0.75,
"grad_norm": 0.8769426345825195,
"learning_rate": 1.5159811016244392e-06,
"loss": 0.5756,
"step": 11886
},
{
"epoch": 0.75,
"grad_norm": 0.9809277057647705,
"learning_rate": 1.5152452689025176e-06,
"loss": 0.5608,
"step": 11887
},
{
"epoch": 0.75,
"grad_norm": 0.8417267203330994,
"learning_rate": 1.5145095829114937e-06,
"loss": 0.56,
"step": 11888
},
{
"epoch": 0.75,
"grad_norm": 0.8778293132781982,
"learning_rate": 1.5137740436823462e-06,
"loss": 0.5685,
"step": 11889
},
{
"epoch": 0.75,
"grad_norm": 0.9264646768569946,
"learning_rate": 1.5130386512460454e-06,
"loss": 0.6045,
"step": 11890
},
{
"epoch": 0.75,
"grad_norm": 0.8687536120414734,
"learning_rate": 1.5123034056335572e-06,
"loss": 0.572,
"step": 11891
},
{
"epoch": 0.75,
"grad_norm": 0.8998939394950867,
"learning_rate": 1.5115683068758419e-06,
"loss": 0.5886,
"step": 11892
},
{
"epoch": 0.75,
"grad_norm": 0.9059341549873352,
"learning_rate": 1.5108333550038461e-06,
"loss": 0.581,
"step": 11893
},
{
"epoch": 0.75,
"grad_norm": 0.9004920721054077,
"learning_rate": 1.510098550048521e-06,
"loss": 0.5742,
"step": 11894
},
{
"epoch": 0.75,
"grad_norm": 0.8428323268890381,
"learning_rate": 1.5093638920408077e-06,
"loss": 0.5403,
"step": 11895
},
{
"epoch": 0.75,
"grad_norm": 0.8726648688316345,
"learning_rate": 1.508629381011636e-06,
"loss": 0.5946,
"step": 11896
},
{
"epoch": 0.75,
"grad_norm": 0.8611435294151306,
"learning_rate": 1.507895016991936e-06,
"loss": 0.5453,
"step": 11897
},
{
"epoch": 0.75,
"grad_norm": 0.9121397137641907,
"learning_rate": 1.507160800012628e-06,
"loss": 0.5912,
"step": 11898
},
{
"epoch": 0.75,
"grad_norm": 0.979377031326294,
"learning_rate": 1.5064267301046281e-06,
"loss": 0.5991,
"step": 11899
},
{
"epoch": 0.75,
"grad_norm": 0.9422827363014221,
"learning_rate": 1.5056928072988475e-06,
"loss": 0.5693,
"step": 11900
},
{
"epoch": 0.75,
"grad_norm": 0.9085085391998291,
"learning_rate": 1.504959031626183e-06,
"loss": 0.6133,
"step": 11901
},
{
"epoch": 0.75,
"grad_norm": 0.9609709978103638,
"learning_rate": 1.5042254031175373e-06,
"loss": 0.6043,
"step": 11902
},
{
"epoch": 0.75,
"grad_norm": 0.8676922917366028,
"learning_rate": 1.5034919218038007e-06,
"loss": 0.5184,
"step": 11903
},
{
"epoch": 0.75,
"grad_norm": 0.8704044818878174,
"learning_rate": 1.502758587715854e-06,
"loss": 0.5492,
"step": 11904
},
{
"epoch": 0.75,
"grad_norm": 0.9572499990463257,
"learning_rate": 1.5020254008845775e-06,
"loss": 0.6057,
"step": 11905
},
{
"epoch": 0.75,
"grad_norm": 0.895881712436676,
"learning_rate": 1.501292361340842e-06,
"loss": 0.5942,
"step": 11906
},
{
"epoch": 0.75,
"grad_norm": 0.8465459942817688,
"learning_rate": 1.500559469115515e-06,
"loss": 0.5872,
"step": 11907
},
{
"epoch": 0.75,
"grad_norm": 0.9170262217521667,
"learning_rate": 1.499826724239456e-06,
"loss": 0.5747,
"step": 11908
},
{
"epoch": 0.75,
"grad_norm": 0.8896523118019104,
"learning_rate": 1.499094126743516e-06,
"loss": 0.624,
"step": 11909
},
{
"epoch": 0.75,
"grad_norm": 0.8543857336044312,
"learning_rate": 1.4983616766585423e-06,
"loss": 0.5329,
"step": 11910
},
{
"epoch": 0.75,
"grad_norm": 0.9327712655067444,
"learning_rate": 1.4976293740153803e-06,
"loss": 0.6734,
"step": 11911
},
{
"epoch": 0.75,
"grad_norm": 0.9487749338150024,
"learning_rate": 1.4968972188448593e-06,
"loss": 0.551,
"step": 11912
},
{
"epoch": 0.75,
"grad_norm": 0.8792836666107178,
"learning_rate": 1.4961652111778103e-06,
"loss": 0.5467,
"step": 11913
},
{
"epoch": 0.75,
"grad_norm": 0.873479962348938,
"learning_rate": 1.4954333510450552e-06,
"loss": 0.5726,
"step": 11914
},
{
"epoch": 0.75,
"grad_norm": 0.8478526473045349,
"learning_rate": 1.4947016384774105e-06,
"loss": 0.5231,
"step": 11915
},
{
"epoch": 0.75,
"grad_norm": 0.8277620673179626,
"learning_rate": 1.4939700735056873e-06,
"loss": 0.5512,
"step": 11916
},
{
"epoch": 0.76,
"grad_norm": 0.9055673480033875,
"learning_rate": 1.493238656160686e-06,
"loss": 0.6138,
"step": 11917
},
{
"epoch": 0.76,
"grad_norm": 0.8501646518707275,
"learning_rate": 1.492507386473206e-06,
"loss": 0.5596,
"step": 11918
},
{
"epoch": 0.76,
"grad_norm": 0.9250453114509583,
"learning_rate": 1.4917762644740381e-06,
"loss": 0.5815,
"step": 11919
},
{
"epoch": 0.76,
"grad_norm": 0.8276757597923279,
"learning_rate": 1.4910452901939671e-06,
"loss": 0.525,
"step": 11920
},
{
"epoch": 0.76,
"grad_norm": 0.8669492602348328,
"learning_rate": 1.4903144636637723e-06,
"loss": 0.5612,
"step": 11921
},
{
"epoch": 0.76,
"grad_norm": 0.9456034302711487,
"learning_rate": 1.489583784914228e-06,
"loss": 0.5591,
"step": 11922
},
{
"epoch": 0.76,
"grad_norm": 0.971352756023407,
"learning_rate": 1.4888532539760958e-06,
"loss": 0.6341,
"step": 11923
},
{
"epoch": 0.76,
"grad_norm": 0.8353976607322693,
"learning_rate": 1.4881228708801409e-06,
"loss": 0.5194,
"step": 11924
},
{
"epoch": 0.76,
"grad_norm": 0.8877169489860535,
"learning_rate": 1.4873926356571144e-06,
"loss": 0.5992,
"step": 11925
},
{
"epoch": 0.76,
"grad_norm": 0.9067363142967224,
"learning_rate": 1.486662548337764e-06,
"loss": 0.6173,
"step": 11926
},
{
"epoch": 0.76,
"grad_norm": 0.891920268535614,
"learning_rate": 1.485932608952832e-06,
"loss": 0.5366,
"step": 11927
},
{
"epoch": 0.76,
"grad_norm": 0.8979024291038513,
"learning_rate": 1.485202817533053e-06,
"loss": 0.5402,
"step": 11928
},
{
"epoch": 0.76,
"grad_norm": 0.9207996129989624,
"learning_rate": 1.4844731741091561e-06,
"loss": 0.5968,
"step": 11929
},
{
"epoch": 0.76,
"grad_norm": 0.9184231162071228,
"learning_rate": 1.4837436787118665e-06,
"loss": 0.5759,
"step": 11930
},
{
"epoch": 0.76,
"grad_norm": 0.851150393486023,
"learning_rate": 1.4830143313718943e-06,
"loss": 0.5953,
"step": 11931
},
{
"epoch": 0.76,
"grad_norm": 0.8827084898948669,
"learning_rate": 1.482285132119956e-06,
"loss": 0.5872,
"step": 11932
},
{
"epoch": 0.76,
"grad_norm": 0.840436577796936,
"learning_rate": 1.4815560809867551e-06,
"loss": 0.5918,
"step": 11933
},
{
"epoch": 0.76,
"grad_norm": 0.903814971446991,
"learning_rate": 1.4808271780029864e-06,
"loss": 0.5741,
"step": 11934
},
{
"epoch": 0.76,
"grad_norm": 0.8789491057395935,
"learning_rate": 1.4800984231993432e-06,
"loss": 0.5188,
"step": 11935
},
{
"epoch": 0.76,
"grad_norm": 0.9306246638298035,
"learning_rate": 1.47936981660651e-06,
"loss": 0.5841,
"step": 11936
},
{
"epoch": 0.76,
"grad_norm": 0.9300113916397095,
"learning_rate": 1.4786413582551668e-06,
"loss": 0.6367,
"step": 11937
},
{
"epoch": 0.76,
"grad_norm": 0.8994358777999878,
"learning_rate": 1.4779130481759874e-06,
"loss": 0.5961,
"step": 11938
},
{
"epoch": 0.76,
"grad_norm": 0.872204065322876,
"learning_rate": 1.4771848863996353e-06,
"loss": 0.6409,
"step": 11939
},
{
"epoch": 0.76,
"grad_norm": 0.897193431854248,
"learning_rate": 1.4764568729567714e-06,
"loss": 0.5399,
"step": 11940
},
{
"epoch": 0.76,
"grad_norm": 0.8492984771728516,
"learning_rate": 1.4757290078780545e-06,
"loss": 0.5339,
"step": 11941
},
{
"epoch": 0.76,
"grad_norm": 0.942596971988678,
"learning_rate": 1.475001291194127e-06,
"loss": 0.6122,
"step": 11942
},
{
"epoch": 0.76,
"grad_norm": 0.9129643440246582,
"learning_rate": 1.4742737229356324e-06,
"loss": 0.6431,
"step": 11943
},
{
"epoch": 0.76,
"grad_norm": 0.9358230829238892,
"learning_rate": 1.473546303133207e-06,
"loss": 0.6236,
"step": 11944
},
{
"epoch": 0.76,
"grad_norm": 0.8448783159255981,
"learning_rate": 1.4728190318174785e-06,
"loss": 0.5691,
"step": 11945
},
{
"epoch": 0.76,
"grad_norm": 0.8805672526359558,
"learning_rate": 1.4720919090190723e-06,
"loss": 0.5347,
"step": 11946
},
{
"epoch": 0.76,
"grad_norm": 0.9884516596794128,
"learning_rate": 1.471364934768601e-06,
"loss": 0.5915,
"step": 11947
},
{
"epoch": 0.76,
"grad_norm": 0.9252734780311584,
"learning_rate": 1.470638109096676e-06,
"loss": 0.5798,
"step": 11948
},
{
"epoch": 0.76,
"grad_norm": 0.8685827851295471,
"learning_rate": 1.469911432033906e-06,
"loss": 0.5078,
"step": 11949
},
{
"epoch": 0.76,
"grad_norm": 0.8583547472953796,
"learning_rate": 1.469184903610883e-06,
"loss": 0.5571,
"step": 11950
},
{
"epoch": 0.76,
"grad_norm": 0.9142690300941467,
"learning_rate": 1.468458523858201e-06,
"loss": 0.5576,
"step": 11951
},
{
"epoch": 0.76,
"grad_norm": 0.8868157267570496,
"learning_rate": 1.467732292806447e-06,
"loss": 0.6154,
"step": 11952
},
{
"epoch": 0.76,
"grad_norm": 0.9219274520874023,
"learning_rate": 1.4670062104861948e-06,
"loss": 0.5348,
"step": 11953
},
{
"epoch": 0.76,
"grad_norm": 0.8708443641662598,
"learning_rate": 1.4662802769280244e-06,
"loss": 0.5806,
"step": 11954
},
{
"epoch": 0.76,
"grad_norm": 0.884825587272644,
"learning_rate": 1.4655544921624964e-06,
"loss": 0.5741,
"step": 11955
},
{
"epoch": 0.76,
"grad_norm": 0.9130429625511169,
"learning_rate": 1.464828856220174e-06,
"loss": 0.5545,
"step": 11956
},
{
"epoch": 0.76,
"grad_norm": 0.9295893907546997,
"learning_rate": 1.4641033691316104e-06,
"loss": 0.6212,
"step": 11957
},
{
"epoch": 0.76,
"grad_norm": 0.8640606999397278,
"learning_rate": 1.4633780309273532e-06,
"loss": 0.5754,
"step": 11958
},
{
"epoch": 0.76,
"grad_norm": 0.8814523220062256,
"learning_rate": 1.4626528416379438e-06,
"loss": 0.5948,
"step": 11959
},
{
"epoch": 0.76,
"grad_norm": 0.91228848695755,
"learning_rate": 1.4619278012939197e-06,
"loss": 0.5684,
"step": 11960
},
{
"epoch": 0.76,
"grad_norm": 0.8493878245353699,
"learning_rate": 1.4612029099258046e-06,
"loss": 0.5311,
"step": 11961
},
{
"epoch": 0.76,
"grad_norm": 0.8996723890304565,
"learning_rate": 1.4604781675641273e-06,
"loss": 0.5628,
"step": 11962
},
{
"epoch": 0.76,
"grad_norm": 0.9304405450820923,
"learning_rate": 1.4597535742393998e-06,
"loss": 0.5539,
"step": 11963
},
{
"epoch": 0.76,
"grad_norm": 0.8904679417610168,
"learning_rate": 1.459029129982134e-06,
"loss": 0.5766,
"step": 11964
},
{
"epoch": 0.76,
"grad_norm": 0.9363497495651245,
"learning_rate": 1.4583048348228345e-06,
"loss": 0.5826,
"step": 11965
},
{
"epoch": 0.76,
"grad_norm": 0.8508997559547424,
"learning_rate": 1.4575806887919951e-06,
"loss": 0.5896,
"step": 11966
},
{
"epoch": 0.76,
"grad_norm": 0.8754972219467163,
"learning_rate": 1.456856691920111e-06,
"loss": 0.579,
"step": 11967
},
{
"epoch": 0.76,
"grad_norm": 0.9017912745475769,
"learning_rate": 1.4561328442376678e-06,
"loss": 0.5713,
"step": 11968
},
{
"epoch": 0.76,
"grad_norm": 0.8693665862083435,
"learning_rate": 1.45540914577514e-06,
"loss": 0.5654,
"step": 11969
},
{
"epoch": 0.76,
"grad_norm": 0.9070661664009094,
"learning_rate": 1.454685596563003e-06,
"loss": 0.5468,
"step": 11970
},
{
"epoch": 0.76,
"grad_norm": 0.8635410666465759,
"learning_rate": 1.4539621966317219e-06,
"loss": 0.5684,
"step": 11971
},
{
"epoch": 0.76,
"grad_norm": 0.8574047088623047,
"learning_rate": 1.4532389460117574e-06,
"loss": 0.5417,
"step": 11972
},
{
"epoch": 0.76,
"grad_norm": 0.9325186610221863,
"learning_rate": 1.4525158447335635e-06,
"loss": 0.5479,
"step": 11973
},
{
"epoch": 0.76,
"grad_norm": 0.8873754143714905,
"learning_rate": 1.4517928928275843e-06,
"loss": 0.5812,
"step": 11974
},
{
"epoch": 0.76,
"grad_norm": 0.8907727599143982,
"learning_rate": 1.4510700903242642e-06,
"loss": 0.5342,
"step": 11975
},
{
"epoch": 0.76,
"grad_norm": 0.8830631375312805,
"learning_rate": 1.4503474372540382e-06,
"loss": 0.6237,
"step": 11976
},
{
"epoch": 0.76,
"grad_norm": 0.9112756252288818,
"learning_rate": 1.4496249336473318e-06,
"loss": 0.5756,
"step": 11977
},
{
"epoch": 0.76,
"grad_norm": 0.9188751578330994,
"learning_rate": 1.4489025795345686e-06,
"loss": 0.5752,
"step": 11978
},
{
"epoch": 0.76,
"grad_norm": 0.915205717086792,
"learning_rate": 1.4481803749461643e-06,
"loss": 0.5848,
"step": 11979
},
{
"epoch": 0.76,
"grad_norm": 0.8783239722251892,
"learning_rate": 1.4474583199125285e-06,
"loss": 0.5691,
"step": 11980
},
{
"epoch": 0.76,
"grad_norm": 0.8469722270965576,
"learning_rate": 1.446736414464066e-06,
"loss": 0.5367,
"step": 11981
},
{
"epoch": 0.76,
"grad_norm": 0.855970025062561,
"learning_rate": 1.4460146586311713e-06,
"loss": 0.5862,
"step": 11982
},
{
"epoch": 0.76,
"grad_norm": 0.9022277593612671,
"learning_rate": 1.4452930524442338e-06,
"loss": 0.5992,
"step": 11983
},
{
"epoch": 0.76,
"grad_norm": 0.856157124042511,
"learning_rate": 1.4445715959336432e-06,
"loss": 0.5505,
"step": 11984
},
{
"epoch": 0.76,
"grad_norm": 0.8723897933959961,
"learning_rate": 1.4438502891297723e-06,
"loss": 0.5493,
"step": 11985
},
{
"epoch": 0.76,
"grad_norm": 0.8988958597183228,
"learning_rate": 1.4431291320629953e-06,
"loss": 0.5577,
"step": 11986
},
{
"epoch": 0.76,
"grad_norm": 0.9534813165664673,
"learning_rate": 1.4424081247636768e-06,
"loss": 0.575,
"step": 11987
},
{
"epoch": 0.76,
"grad_norm": 0.886587917804718,
"learning_rate": 1.4416872672621762e-06,
"loss": 0.6182,
"step": 11988
},
{
"epoch": 0.76,
"grad_norm": 0.9059985280036926,
"learning_rate": 1.440966559588846e-06,
"loss": 0.5644,
"step": 11989
},
{
"epoch": 0.76,
"grad_norm": 0.8605340719223022,
"learning_rate": 1.4402460017740355e-06,
"loss": 0.5692,
"step": 11990
},
{
"epoch": 0.76,
"grad_norm": 0.8864085674285889,
"learning_rate": 1.4395255938480785e-06,
"loss": 0.5363,
"step": 11991
},
{
"epoch": 0.76,
"grad_norm": 0.9561933875083923,
"learning_rate": 1.4388053358413162e-06,
"loss": 0.5968,
"step": 11992
},
{
"epoch": 0.76,
"grad_norm": 0.8334751129150391,
"learning_rate": 1.4380852277840712e-06,
"loss": 0.5657,
"step": 11993
},
{
"epoch": 0.76,
"grad_norm": 0.878257155418396,
"learning_rate": 1.437365269706666e-06,
"loss": 0.5811,
"step": 11994
},
{
"epoch": 0.76,
"grad_norm": 0.8865748047828674,
"learning_rate": 1.436645461639416e-06,
"loss": 0.5639,
"step": 11995
},
{
"epoch": 0.76,
"grad_norm": 0.9328632354736328,
"learning_rate": 1.4359258036126295e-06,
"loss": 0.5744,
"step": 11996
},
{
"epoch": 0.76,
"grad_norm": 0.8493953943252563,
"learning_rate": 1.4352062956566088e-06,
"loss": 0.5575,
"step": 11997
},
{
"epoch": 0.76,
"grad_norm": 0.8793647289276123,
"learning_rate": 1.4344869378016518e-06,
"loss": 0.5454,
"step": 11998
},
{
"epoch": 0.76,
"grad_norm": 0.8968666195869446,
"learning_rate": 1.4337677300780445e-06,
"loss": 0.6076,
"step": 11999
},
{
"epoch": 0.76,
"grad_norm": 0.8650113940238953,
"learning_rate": 1.433048672516072e-06,
"loss": 0.601,
"step": 12000
},
{
"epoch": 0.76,
"grad_norm": 0.8847838044166565,
"learning_rate": 1.4323297651460117e-06,
"loss": 0.5885,
"step": 12001
},
{
"epoch": 0.76,
"grad_norm": 0.9786666035652161,
"learning_rate": 1.4316110079981339e-06,
"loss": 0.5909,
"step": 12002
},
{
"epoch": 0.76,
"grad_norm": 0.8795079588890076,
"learning_rate": 1.4308924011027042e-06,
"loss": 0.593,
"step": 12003
},
{
"epoch": 0.76,
"grad_norm": 0.9203742146492004,
"learning_rate": 1.430173944489977e-06,
"loss": 0.576,
"step": 12004
},
{
"epoch": 0.76,
"grad_norm": 0.8110754489898682,
"learning_rate": 1.4294556381902074e-06,
"loss": 0.5006,
"step": 12005
},
{
"epoch": 0.76,
"grad_norm": 0.90155029296875,
"learning_rate": 1.428737482233642e-06,
"loss": 0.5475,
"step": 12006
},
{
"epoch": 0.76,
"grad_norm": 0.8908960819244385,
"learning_rate": 1.4280194766505156e-06,
"loss": 0.5745,
"step": 12007
},
{
"epoch": 0.76,
"grad_norm": 0.9284471869468689,
"learning_rate": 1.427301621471064e-06,
"loss": 0.6061,
"step": 12008
},
{
"epoch": 0.76,
"grad_norm": 0.9098774790763855,
"learning_rate": 1.4265839167255114e-06,
"loss": 0.6325,
"step": 12009
},
{
"epoch": 0.76,
"grad_norm": 0.8448832035064697,
"learning_rate": 1.42586636244408e-06,
"loss": 0.5629,
"step": 12010
},
{
"epoch": 0.76,
"grad_norm": 0.898383378982544,
"learning_rate": 1.4251489586569834e-06,
"loss": 0.6486,
"step": 12011
},
{
"epoch": 0.76,
"grad_norm": 0.915291428565979,
"learning_rate": 1.4244317053944268e-06,
"loss": 0.5692,
"step": 12012
},
{
"epoch": 0.76,
"grad_norm": 0.8258968591690063,
"learning_rate": 1.423714602686611e-06,
"loss": 0.5649,
"step": 12013
},
{
"epoch": 0.76,
"grad_norm": 0.8475250601768494,
"learning_rate": 1.4229976505637361e-06,
"loss": 0.5672,
"step": 12014
},
{
"epoch": 0.76,
"grad_norm": 0.8996663689613342,
"learning_rate": 1.4222808490559842e-06,
"loss": 0.6316,
"step": 12015
},
{
"epoch": 0.76,
"grad_norm": 0.8408166766166687,
"learning_rate": 1.4215641981935403e-06,
"loss": 0.4919,
"step": 12016
},
{
"epoch": 0.76,
"grad_norm": 0.9476755261421204,
"learning_rate": 1.4208476980065794e-06,
"loss": 0.5853,
"step": 12017
},
{
"epoch": 0.76,
"grad_norm": 0.8874031901359558,
"learning_rate": 1.420131348525271e-06,
"loss": 0.5749,
"step": 12018
},
{
"epoch": 0.76,
"grad_norm": 0.8719558119773865,
"learning_rate": 1.4194151497797793e-06,
"loss": 0.6041,
"step": 12019
},
{
"epoch": 0.76,
"grad_norm": 0.9502779841423035,
"learning_rate": 1.4186991018002582e-06,
"loss": 0.5715,
"step": 12020
},
{
"epoch": 0.76,
"grad_norm": 0.8630761504173279,
"learning_rate": 1.4179832046168584e-06,
"loss": 0.6021,
"step": 12021
},
{
"epoch": 0.76,
"grad_norm": 0.922612190246582,
"learning_rate": 1.417267458259728e-06,
"loss": 0.5775,
"step": 12022
},
{
"epoch": 0.76,
"grad_norm": 0.8538753390312195,
"learning_rate": 1.4165518627589991e-06,
"loss": 0.5034,
"step": 12023
},
{
"epoch": 0.76,
"grad_norm": 0.9272584915161133,
"learning_rate": 1.4158364181448065e-06,
"loss": 0.5819,
"step": 12024
},
{
"epoch": 0.76,
"grad_norm": 0.9062113761901855,
"learning_rate": 1.4151211244472734e-06,
"loss": 0.6303,
"step": 12025
},
{
"epoch": 0.76,
"grad_norm": 1.0064603090286255,
"learning_rate": 1.414405981696519e-06,
"loss": 0.5895,
"step": 12026
},
{
"epoch": 0.76,
"grad_norm": 0.8894251585006714,
"learning_rate": 1.4136909899226564e-06,
"loss": 0.6,
"step": 12027
},
{
"epoch": 0.76,
"grad_norm": 0.8731244206428528,
"learning_rate": 1.412976149155789e-06,
"loss": 0.5272,
"step": 12028
},
{
"epoch": 0.76,
"grad_norm": 0.9041507840156555,
"learning_rate": 1.412261459426018e-06,
"loss": 0.5635,
"step": 12029
},
{
"epoch": 0.76,
"grad_norm": 0.9179893136024475,
"learning_rate": 1.4115469207634358e-06,
"loss": 0.6162,
"step": 12030
},
{
"epoch": 0.76,
"grad_norm": 0.9017035365104675,
"learning_rate": 1.4108325331981298e-06,
"loss": 0.5674,
"step": 12031
},
{
"epoch": 0.76,
"grad_norm": 0.8612952828407288,
"learning_rate": 1.4101182967601796e-06,
"loss": 0.6167,
"step": 12032
},
{
"epoch": 0.76,
"grad_norm": 0.8670690059661865,
"learning_rate": 1.4094042114796613e-06,
"loss": 0.5648,
"step": 12033
},
{
"epoch": 0.76,
"grad_norm": 0.900267481803894,
"learning_rate": 1.4086902773866379e-06,
"loss": 0.5819,
"step": 12034
},
{
"epoch": 0.76,
"grad_norm": 0.8597514629364014,
"learning_rate": 1.4079764945111767e-06,
"loss": 0.5914,
"step": 12035
},
{
"epoch": 0.76,
"grad_norm": 0.8889679908752441,
"learning_rate": 1.407262862883328e-06,
"loss": 0.5411,
"step": 12036
},
{
"epoch": 0.76,
"grad_norm": 0.8739466071128845,
"learning_rate": 1.4065493825331416e-06,
"loss": 0.5116,
"step": 12037
},
{
"epoch": 0.76,
"grad_norm": 0.9049973487854004,
"learning_rate": 1.4058360534906607e-06,
"loss": 0.6136,
"step": 12038
},
{
"epoch": 0.76,
"grad_norm": 0.9659464359283447,
"learning_rate": 1.4051228757859197e-06,
"loss": 0.6078,
"step": 12039
},
{
"epoch": 0.76,
"grad_norm": 0.9126960635185242,
"learning_rate": 1.4044098494489494e-06,
"loss": 0.5604,
"step": 12040
},
{
"epoch": 0.76,
"grad_norm": 0.8697749972343445,
"learning_rate": 1.4036969745097735e-06,
"loss": 0.558,
"step": 12041
},
{
"epoch": 0.76,
"grad_norm": 0.8654941320419312,
"learning_rate": 1.4029842509984043e-06,
"loss": 0.5581,
"step": 12042
},
{
"epoch": 0.76,
"grad_norm": 0.8910323977470398,
"learning_rate": 1.4022716789448581e-06,
"loss": 0.5612,
"step": 12043
},
{
"epoch": 0.76,
"grad_norm": 0.8871368765830994,
"learning_rate": 1.4015592583791343e-06,
"loss": 0.5548,
"step": 12044
},
{
"epoch": 0.76,
"grad_norm": 0.8820889592170715,
"learning_rate": 1.4008469893312321e-06,
"loss": 0.5956,
"step": 12045
},
{
"epoch": 0.76,
"grad_norm": 0.8938196301460266,
"learning_rate": 1.4001348718311446e-06,
"loss": 0.5659,
"step": 12046
},
{
"epoch": 0.76,
"grad_norm": 0.864248514175415,
"learning_rate": 1.399422905908851e-06,
"loss": 0.5602,
"step": 12047
},
{
"epoch": 0.76,
"grad_norm": 0.9130455851554871,
"learning_rate": 1.3987110915943352e-06,
"loss": 0.5987,
"step": 12048
},
{
"epoch": 0.76,
"grad_norm": 0.8632736802101135,
"learning_rate": 1.397999428917569e-06,
"loss": 0.5656,
"step": 12049
},
{
"epoch": 0.76,
"grad_norm": 0.9414397478103638,
"learning_rate": 1.3972879179085147e-06,
"loss": 0.5893,
"step": 12050
},
{
"epoch": 0.76,
"grad_norm": 0.9404668211936951,
"learning_rate": 1.396576558597133e-06,
"loss": 0.608,
"step": 12051
},
{
"epoch": 0.76,
"grad_norm": 0.9048896431922913,
"learning_rate": 1.3958653510133774e-06,
"loss": 0.5404,
"step": 12052
},
{
"epoch": 0.76,
"grad_norm": 0.9009268879890442,
"learning_rate": 1.3951542951871938e-06,
"loss": 0.5708,
"step": 12053
},
{
"epoch": 0.76,
"grad_norm": 0.8709206581115723,
"learning_rate": 1.3944433911485229e-06,
"loss": 0.5458,
"step": 12054
},
{
"epoch": 0.76,
"grad_norm": 0.8912920951843262,
"learning_rate": 1.3937326389272977e-06,
"loss": 0.6084,
"step": 12055
},
{
"epoch": 0.76,
"grad_norm": 0.8818450570106506,
"learning_rate": 1.3930220385534453e-06,
"loss": 0.6093,
"step": 12056
},
{
"epoch": 0.76,
"grad_norm": 0.8648407459259033,
"learning_rate": 1.3923115900568896e-06,
"loss": 0.6086,
"step": 12057
},
{
"epoch": 0.76,
"grad_norm": 0.9007489681243896,
"learning_rate": 1.3916012934675405e-06,
"loss": 0.5638,
"step": 12058
},
{
"epoch": 0.76,
"grad_norm": 0.8429774641990662,
"learning_rate": 1.3908911488153081e-06,
"loss": 0.5916,
"step": 12059
},
{
"epoch": 0.76,
"grad_norm": 0.8665831685066223,
"learning_rate": 1.3901811561300944e-06,
"loss": 0.5793,
"step": 12060
},
{
"epoch": 0.76,
"grad_norm": 0.8880481719970703,
"learning_rate": 1.3894713154417944e-06,
"loss": 0.567,
"step": 12061
},
{
"epoch": 0.76,
"grad_norm": 0.8977078199386597,
"learning_rate": 1.3887616267802972e-06,
"loss": 0.5831,
"step": 12062
},
{
"epoch": 0.76,
"grad_norm": 0.8818337917327881,
"learning_rate": 1.3880520901754874e-06,
"loss": 0.584,
"step": 12063
},
{
"epoch": 0.76,
"grad_norm": 0.9489629864692688,
"learning_rate": 1.3873427056572354e-06,
"loss": 0.6143,
"step": 12064
},
{
"epoch": 0.76,
"grad_norm": 0.8898563981056213,
"learning_rate": 1.386633473255418e-06,
"loss": 0.5799,
"step": 12065
},
{
"epoch": 0.76,
"grad_norm": 0.9059821963310242,
"learning_rate": 1.3859243929998933e-06,
"loss": 0.6092,
"step": 12066
},
{
"epoch": 0.76,
"grad_norm": 0.8613783121109009,
"learning_rate": 1.3852154649205201e-06,
"loss": 0.5234,
"step": 12067
},
{
"epoch": 0.76,
"grad_norm": 0.9153664112091064,
"learning_rate": 1.3845066890471487e-06,
"loss": 0.5966,
"step": 12068
},
{
"epoch": 0.76,
"grad_norm": 0.8898327350616455,
"learning_rate": 1.3837980654096229e-06,
"loss": 0.6014,
"step": 12069
},
{
"epoch": 0.76,
"grad_norm": 0.9291636347770691,
"learning_rate": 1.383089594037781e-06,
"loss": 0.5429,
"step": 12070
},
{
"epoch": 0.76,
"grad_norm": 0.9276854991912842,
"learning_rate": 1.3823812749614556e-06,
"loss": 0.5632,
"step": 12071
},
{
"epoch": 0.76,
"grad_norm": 0.9263901114463806,
"learning_rate": 1.3816731082104668e-06,
"loss": 0.5816,
"step": 12072
},
{
"epoch": 0.76,
"grad_norm": 0.9651497602462769,
"learning_rate": 1.3809650938146391e-06,
"loss": 0.6368,
"step": 12073
},
{
"epoch": 0.76,
"grad_norm": 0.8885228037834167,
"learning_rate": 1.3802572318037804e-06,
"loss": 0.5443,
"step": 12074
},
{
"epoch": 0.77,
"grad_norm": 0.8886858820915222,
"learning_rate": 1.379549522207697e-06,
"loss": 0.5806,
"step": 12075
},
{
"epoch": 0.77,
"grad_norm": 0.9202280640602112,
"learning_rate": 1.3788419650561908e-06,
"loss": 0.6229,
"step": 12076
},
{
"epoch": 0.77,
"grad_norm": 0.875375509262085,
"learning_rate": 1.3781345603790485e-06,
"loss": 0.4984,
"step": 12077
},
{
"epoch": 0.77,
"grad_norm": 0.8515941500663757,
"learning_rate": 1.3774273082060625e-06,
"loss": 0.5577,
"step": 12078
},
{
"epoch": 0.77,
"grad_norm": 0.9056531190872192,
"learning_rate": 1.3767202085670118e-06,
"loss": 0.6189,
"step": 12079
},
{
"epoch": 0.77,
"grad_norm": 0.877628743648529,
"learning_rate": 1.3760132614916672e-06,
"loss": 0.5966,
"step": 12080
},
{
"epoch": 0.77,
"grad_norm": 0.9349701404571533,
"learning_rate": 1.375306467009797e-06,
"loss": 0.64,
"step": 12081
},
{
"epoch": 0.77,
"grad_norm": 0.9644002914428711,
"learning_rate": 1.3745998251511622e-06,
"loss": 0.5834,
"step": 12082
},
{
"epoch": 0.77,
"grad_norm": 0.8802942037582397,
"learning_rate": 1.373893335945517e-06,
"loss": 0.5498,
"step": 12083
},
{
"epoch": 0.77,
"grad_norm": 0.936839759349823,
"learning_rate": 1.373186999422611e-06,
"loss": 0.5698,
"step": 12084
},
{
"epoch": 0.77,
"grad_norm": 0.8854506015777588,
"learning_rate": 1.3724808156121799e-06,
"loss": 0.5429,
"step": 12085
},
{
"epoch": 0.77,
"grad_norm": 0.882511556148529,
"learning_rate": 1.3717747845439645e-06,
"loss": 0.531,
"step": 12086
},
{
"epoch": 0.77,
"grad_norm": 0.8254367709159851,
"learning_rate": 1.371068906247693e-06,
"loss": 0.5357,
"step": 12087
},
{
"epoch": 0.77,
"grad_norm": 0.9685107469558716,
"learning_rate": 1.3703631807530831e-06,
"loss": 0.6293,
"step": 12088
},
{
"epoch": 0.77,
"grad_norm": 0.8970738649368286,
"learning_rate": 1.3696576080898538e-06,
"loss": 0.6471,
"step": 12089
},
{
"epoch": 0.77,
"grad_norm": 0.8906121253967285,
"learning_rate": 1.3689521882877137e-06,
"loss": 0.5589,
"step": 12090
},
{
"epoch": 0.77,
"grad_norm": 0.8889774084091187,
"learning_rate": 1.3682469213763655e-06,
"loss": 0.5734,
"step": 12091
},
{
"epoch": 0.77,
"grad_norm": 0.8506032228469849,
"learning_rate": 1.367541807385507e-06,
"loss": 0.5613,
"step": 12092
},
{
"epoch": 0.77,
"grad_norm": 0.9361366629600525,
"learning_rate": 1.3668368463448246e-06,
"loss": 0.5551,
"step": 12093
},
{
"epoch": 0.77,
"grad_norm": 0.867592990398407,
"learning_rate": 1.3661320382840026e-06,
"loss": 0.4914,
"step": 12094
},
{
"epoch": 0.77,
"grad_norm": 0.9990113973617554,
"learning_rate": 1.3654273832327219e-06,
"loss": 0.5793,
"step": 12095
},
{
"epoch": 0.77,
"grad_norm": 0.9370816946029663,
"learning_rate": 1.3647228812206493e-06,
"loss": 0.5923,
"step": 12096
},
{
"epoch": 0.77,
"grad_norm": 0.8764215111732483,
"learning_rate": 1.3640185322774495e-06,
"loss": 0.5852,
"step": 12097
},
{
"epoch": 0.77,
"grad_norm": 0.8534221053123474,
"learning_rate": 1.3633143364327812e-06,
"loss": 0.5271,
"step": 12098
},
{
"epoch": 0.77,
"grad_norm": 0.8506825566291809,
"learning_rate": 1.3626102937162943e-06,
"loss": 0.5034,
"step": 12099
},
{
"epoch": 0.77,
"grad_norm": 0.9237973690032959,
"learning_rate": 1.3619064041576368e-06,
"loss": 0.5503,
"step": 12100
},
{
"epoch": 0.77,
"grad_norm": 0.9016441106796265,
"learning_rate": 1.3612026677864426e-06,
"loss": 0.6057,
"step": 12101
},
{
"epoch": 0.77,
"grad_norm": 0.9441617727279663,
"learning_rate": 1.360499084632344e-06,
"loss": 0.6116,
"step": 12102
},
{
"epoch": 0.77,
"grad_norm": 0.8589941263198853,
"learning_rate": 1.3597956547249713e-06,
"loss": 0.5755,
"step": 12103
},
{
"epoch": 0.77,
"grad_norm": 0.8942433595657349,
"learning_rate": 1.3590923780939386e-06,
"loss": 0.575,
"step": 12104
},
{
"epoch": 0.77,
"grad_norm": 0.8741679787635803,
"learning_rate": 1.3583892547688598e-06,
"loss": 0.5754,
"step": 12105
},
{
"epoch": 0.77,
"grad_norm": 0.8796617984771729,
"learning_rate": 1.357686284779343e-06,
"loss": 0.5743,
"step": 12106
},
{
"epoch": 0.77,
"grad_norm": 0.8270777463912964,
"learning_rate": 1.3569834681549832e-06,
"loss": 0.5867,
"step": 12107
},
{
"epoch": 0.77,
"grad_norm": 0.8669138550758362,
"learning_rate": 1.3562808049253795e-06,
"loss": 0.575,
"step": 12108
},
{
"epoch": 0.77,
"grad_norm": 0.8774738311767578,
"learning_rate": 1.3555782951201134e-06,
"loss": 0.6112,
"step": 12109
},
{
"epoch": 0.77,
"grad_norm": 0.8828439712524414,
"learning_rate": 1.3548759387687683e-06,
"loss": 0.5896,
"step": 12110
},
{
"epoch": 0.77,
"grad_norm": 0.9196562170982361,
"learning_rate": 1.3541737359009161e-06,
"loss": 0.5742,
"step": 12111
},
{
"epoch": 0.77,
"grad_norm": 0.8660917282104492,
"learning_rate": 1.3534716865461256e-06,
"loss": 0.5293,
"step": 12112
},
{
"epoch": 0.77,
"grad_norm": 0.8525222539901733,
"learning_rate": 1.3527697907339565e-06,
"loss": 0.5479,
"step": 12113
},
{
"epoch": 0.77,
"grad_norm": 0.855554461479187,
"learning_rate": 1.3520680484939651e-06,
"loss": 0.5706,
"step": 12114
},
{
"epoch": 0.77,
"grad_norm": 1.0038961172103882,
"learning_rate": 1.3513664598556952e-06,
"loss": 0.5432,
"step": 12115
},
{
"epoch": 0.77,
"grad_norm": 0.8975916504859924,
"learning_rate": 1.3506650248486946e-06,
"loss": 0.5811,
"step": 12116
},
{
"epoch": 0.77,
"grad_norm": 1.0002868175506592,
"learning_rate": 1.3499637435024926e-06,
"loss": 0.5884,
"step": 12117
},
{
"epoch": 0.77,
"grad_norm": 0.9339752197265625,
"learning_rate": 1.34926261584662e-06,
"loss": 0.5654,
"step": 12118
},
{
"epoch": 0.77,
"grad_norm": 0.8323291540145874,
"learning_rate": 1.3485616419105985e-06,
"loss": 0.6116,
"step": 12119
},
{
"epoch": 0.77,
"grad_norm": 0.9814819097518921,
"learning_rate": 1.3478608217239435e-06,
"loss": 0.5862,
"step": 12120
},
{
"epoch": 0.77,
"grad_norm": 0.8952215313911438,
"learning_rate": 1.347160155316165e-06,
"loss": 0.5829,
"step": 12121
},
{
"epoch": 0.77,
"grad_norm": 0.9084662795066833,
"learning_rate": 1.3464596427167663e-06,
"loss": 0.574,
"step": 12122
},
{
"epoch": 0.77,
"grad_norm": 0.8473517298698425,
"learning_rate": 1.3457592839552409e-06,
"loss": 0.5339,
"step": 12123
},
{
"epoch": 0.77,
"grad_norm": 0.8757284879684448,
"learning_rate": 1.3450590790610795e-06,
"loss": 0.5858,
"step": 12124
},
{
"epoch": 0.77,
"grad_norm": 0.9462736248970032,
"learning_rate": 1.3443590280637664e-06,
"loss": 0.5508,
"step": 12125
},
{
"epoch": 0.77,
"grad_norm": 0.9609660506248474,
"learning_rate": 1.3436591309927772e-06,
"loss": 0.58,
"step": 12126
},
{
"epoch": 0.77,
"grad_norm": 0.8770208358764648,
"learning_rate": 1.3429593878775825e-06,
"loss": 0.5592,
"step": 12127
},
{
"epoch": 0.77,
"grad_norm": 0.9094352722167969,
"learning_rate": 1.342259798747646e-06,
"loss": 0.5542,
"step": 12128
},
{
"epoch": 0.77,
"grad_norm": 0.8885565996170044,
"learning_rate": 1.3415603636324248e-06,
"loss": 0.5852,
"step": 12129
},
{
"epoch": 0.77,
"grad_norm": 0.839444637298584,
"learning_rate": 1.3408610825613722e-06,
"loss": 0.5314,
"step": 12130
},
{
"epoch": 0.77,
"grad_norm": 0.9045486450195312,
"learning_rate": 1.340161955563928e-06,
"loss": 0.5268,
"step": 12131
},
{
"epoch": 0.77,
"grad_norm": 0.886461079120636,
"learning_rate": 1.339462982669531e-06,
"loss": 0.5484,
"step": 12132
},
{
"epoch": 0.77,
"grad_norm": 0.8416271805763245,
"learning_rate": 1.3387641639076165e-06,
"loss": 0.5461,
"step": 12133
},
{
"epoch": 0.77,
"grad_norm": 0.8772505521774292,
"learning_rate": 1.3380654993076054e-06,
"loss": 0.5332,
"step": 12134
},
{
"epoch": 0.77,
"grad_norm": 0.824565589427948,
"learning_rate": 1.3373669888989167e-06,
"loss": 0.5231,
"step": 12135
},
{
"epoch": 0.77,
"grad_norm": 0.8874905109405518,
"learning_rate": 1.3366686327109645e-06,
"loss": 0.5964,
"step": 12136
},
{
"epoch": 0.77,
"grad_norm": 0.9241152405738831,
"learning_rate": 1.3359704307731491e-06,
"loss": 0.563,
"step": 12137
},
{
"epoch": 0.77,
"grad_norm": 0.9180853366851807,
"learning_rate": 1.3352723831148761e-06,
"loss": 0.5685,
"step": 12138
},
{
"epoch": 0.77,
"grad_norm": 0.9550989270210266,
"learning_rate": 1.3345744897655327e-06,
"loss": 0.5835,
"step": 12139
},
{
"epoch": 0.77,
"grad_norm": 0.8818813562393188,
"learning_rate": 1.3338767507545064e-06,
"loss": 0.5739,
"step": 12140
},
{
"epoch": 0.77,
"grad_norm": 0.8973625302314758,
"learning_rate": 1.3331791661111765e-06,
"loss": 0.5607,
"step": 12141
},
{
"epoch": 0.77,
"grad_norm": 0.9165273904800415,
"learning_rate": 1.3324817358649162e-06,
"loss": 0.5993,
"step": 12142
},
{
"epoch": 0.77,
"grad_norm": 0.8530603647232056,
"learning_rate": 1.3317844600450912e-06,
"loss": 0.5488,
"step": 12143
},
{
"epoch": 0.77,
"grad_norm": 0.9110085964202881,
"learning_rate": 1.3310873386810641e-06,
"loss": 0.6053,
"step": 12144
},
{
"epoch": 0.77,
"grad_norm": 0.8425561189651489,
"learning_rate": 1.330390371802182e-06,
"loss": 0.5784,
"step": 12145
},
{
"epoch": 0.77,
"grad_norm": 0.9364494681358337,
"learning_rate": 1.3296935594377996e-06,
"loss": 0.6112,
"step": 12146
},
{
"epoch": 0.77,
"grad_norm": 0.9249874949455261,
"learning_rate": 1.3289969016172515e-06,
"loss": 0.6136,
"step": 12147
},
{
"epoch": 0.77,
"grad_norm": 0.8937221169471741,
"learning_rate": 1.3283003983698733e-06,
"loss": 0.5601,
"step": 12148
},
{
"epoch": 0.77,
"grad_norm": 0.9098302721977234,
"learning_rate": 1.3276040497249926e-06,
"loss": 0.5666,
"step": 12149
},
{
"epoch": 0.77,
"grad_norm": 0.862511396408081,
"learning_rate": 1.3269078557119297e-06,
"loss": 0.5542,
"step": 12150
},
{
"epoch": 0.77,
"grad_norm": 0.9537906646728516,
"learning_rate": 1.3262118163599992e-06,
"loss": 0.5299,
"step": 12151
},
{
"epoch": 0.77,
"grad_norm": 0.9067496657371521,
"learning_rate": 1.3255159316985105e-06,
"loss": 0.5696,
"step": 12152
},
{
"epoch": 0.77,
"grad_norm": 0.8889273405075073,
"learning_rate": 1.3248202017567624e-06,
"loss": 0.5981,
"step": 12153
},
{
"epoch": 0.77,
"grad_norm": 0.8882769346237183,
"learning_rate": 1.32412462656405e-06,
"loss": 0.61,
"step": 12154
},
{
"epoch": 0.77,
"grad_norm": 0.9011073112487793,
"learning_rate": 1.3234292061496622e-06,
"loss": 0.5695,
"step": 12155
},
{
"epoch": 0.77,
"grad_norm": 0.8439561724662781,
"learning_rate": 1.3227339405428807e-06,
"loss": 0.5706,
"step": 12156
},
{
"epoch": 0.77,
"grad_norm": 0.9296759366989136,
"learning_rate": 1.3220388297729825e-06,
"loss": 0.5967,
"step": 12157
},
{
"epoch": 0.77,
"grad_norm": 0.8398501873016357,
"learning_rate": 1.3213438738692313e-06,
"loss": 0.5288,
"step": 12158
},
{
"epoch": 0.77,
"grad_norm": 0.908816397190094,
"learning_rate": 1.320649072860894e-06,
"loss": 0.5917,
"step": 12159
},
{
"epoch": 0.77,
"grad_norm": 0.90097975730896,
"learning_rate": 1.3199544267772257e-06,
"loss": 0.5321,
"step": 12160
},
{
"epoch": 0.77,
"grad_norm": 0.899578869342804,
"learning_rate": 1.3192599356474733e-06,
"loss": 0.6484,
"step": 12161
},
{
"epoch": 0.77,
"grad_norm": 0.8797482252120972,
"learning_rate": 1.318565599500881e-06,
"loss": 0.6243,
"step": 12162
},
{
"epoch": 0.77,
"grad_norm": 0.873276948928833,
"learning_rate": 1.3178714183666846e-06,
"loss": 0.5852,
"step": 12163
},
{
"epoch": 0.77,
"grad_norm": 0.9565367102622986,
"learning_rate": 1.3171773922741132e-06,
"loss": 0.573,
"step": 12164
},
{
"epoch": 0.77,
"grad_norm": 0.8669590950012207,
"learning_rate": 1.316483521252392e-06,
"loss": 0.6123,
"step": 12165
},
{
"epoch": 0.77,
"grad_norm": 0.7968014478683472,
"learning_rate": 1.3157898053307322e-06,
"loss": 0.5062,
"step": 12166
},
{
"epoch": 0.77,
"grad_norm": 0.9194180369377136,
"learning_rate": 1.3150962445383492e-06,
"loss": 0.5859,
"step": 12167
},
{
"epoch": 0.77,
"grad_norm": 0.8830471038818359,
"learning_rate": 1.314402838904446e-06,
"loss": 0.5655,
"step": 12168
},
{
"epoch": 0.77,
"grad_norm": 0.9183891415596008,
"learning_rate": 1.3137095884582163e-06,
"loss": 0.6396,
"step": 12169
},
{
"epoch": 0.77,
"grad_norm": 0.8405441045761108,
"learning_rate": 1.3130164932288524e-06,
"loss": 0.5072,
"step": 12170
},
{
"epoch": 0.77,
"grad_norm": 0.9240639209747314,
"learning_rate": 1.3123235532455376e-06,
"loss": 0.6048,
"step": 12171
},
{
"epoch": 0.77,
"grad_norm": 0.8928464651107788,
"learning_rate": 1.3116307685374497e-06,
"loss": 0.5657,
"step": 12172
},
{
"epoch": 0.77,
"grad_norm": 0.8829059600830078,
"learning_rate": 1.3109381391337605e-06,
"loss": 0.5598,
"step": 12173
},
{
"epoch": 0.77,
"grad_norm": 0.8686420321464539,
"learning_rate": 1.3102456650636314e-06,
"loss": 0.6097,
"step": 12174
},
{
"epoch": 0.77,
"grad_norm": 0.9173951745033264,
"learning_rate": 1.3095533463562204e-06,
"loss": 0.6205,
"step": 12175
},
{
"epoch": 0.77,
"grad_norm": 0.9148120880126953,
"learning_rate": 1.3088611830406828e-06,
"loss": 0.5677,
"step": 12176
},
{
"epoch": 0.77,
"grad_norm": 0.8983868360519409,
"learning_rate": 1.3081691751461588e-06,
"loss": 0.5204,
"step": 12177
},
{
"epoch": 0.77,
"grad_norm": 0.9133874177932739,
"learning_rate": 1.3074773227017878e-06,
"loss": 0.601,
"step": 12178
},
{
"epoch": 0.77,
"grad_norm": 0.9384349584579468,
"learning_rate": 1.3067856257367018e-06,
"loss": 0.5376,
"step": 12179
},
{
"epoch": 0.77,
"grad_norm": 0.9111471772193909,
"learning_rate": 1.3060940842800247e-06,
"loss": 0.5794,
"step": 12180
},
{
"epoch": 0.77,
"grad_norm": 0.8366988897323608,
"learning_rate": 1.3054026983608776e-06,
"loss": 0.5528,
"step": 12181
},
{
"epoch": 0.77,
"grad_norm": 0.887912392616272,
"learning_rate": 1.3047114680083683e-06,
"loss": 0.544,
"step": 12182
},
{
"epoch": 0.77,
"grad_norm": 0.8583880662918091,
"learning_rate": 1.3040203932516043e-06,
"loss": 0.5857,
"step": 12183
},
{
"epoch": 0.77,
"grad_norm": 0.8937926888465881,
"learning_rate": 1.303329474119684e-06,
"loss": 0.5397,
"step": 12184
},
{
"epoch": 0.77,
"grad_norm": 0.9099065661430359,
"learning_rate": 1.3026387106417e-06,
"loss": 0.5527,
"step": 12185
},
{
"epoch": 0.77,
"grad_norm": 0.8292108774185181,
"learning_rate": 1.301948102846738e-06,
"loss": 0.5656,
"step": 12186
},
{
"epoch": 0.77,
"grad_norm": 0.8275082111358643,
"learning_rate": 1.301257650763878e-06,
"loss": 0.5818,
"step": 12187
},
{
"epoch": 0.77,
"grad_norm": 0.973997950553894,
"learning_rate": 1.3005673544221882e-06,
"loss": 0.6322,
"step": 12188
},
{
"epoch": 0.77,
"grad_norm": 0.8978073596954346,
"learning_rate": 1.299877213850741e-06,
"loss": 0.5376,
"step": 12189
},
{
"epoch": 0.77,
"grad_norm": 0.8951266407966614,
"learning_rate": 1.2991872290785906e-06,
"loss": 0.5989,
"step": 12190
},
{
"epoch": 0.77,
"grad_norm": 1.023902416229248,
"learning_rate": 1.2984974001347922e-06,
"loss": 0.5635,
"step": 12191
},
{
"epoch": 0.77,
"grad_norm": 0.8716408014297485,
"learning_rate": 1.2978077270483913e-06,
"loss": 0.5602,
"step": 12192
},
{
"epoch": 0.77,
"grad_norm": 0.865201473236084,
"learning_rate": 1.2971182098484286e-06,
"loss": 0.5535,
"step": 12193
},
{
"epoch": 0.77,
"grad_norm": 0.9294458031654358,
"learning_rate": 1.2964288485639366e-06,
"loss": 0.5739,
"step": 12194
},
{
"epoch": 0.77,
"grad_norm": 0.9854139089584351,
"learning_rate": 1.2957396432239427e-06,
"loss": 0.5758,
"step": 12195
},
{
"epoch": 0.77,
"grad_norm": 0.9392171502113342,
"learning_rate": 1.2950505938574643e-06,
"loss": 0.5966,
"step": 12196
},
{
"epoch": 0.77,
"grad_norm": 0.9069497585296631,
"learning_rate": 1.2943617004935176e-06,
"loss": 0.5433,
"step": 12197
},
{
"epoch": 0.77,
"grad_norm": 0.9116702079772949,
"learning_rate": 1.2936729631611106e-06,
"loss": 0.596,
"step": 12198
},
{
"epoch": 0.77,
"grad_norm": 0.9324621558189392,
"learning_rate": 1.2929843818892401e-06,
"loss": 0.5372,
"step": 12199
},
{
"epoch": 0.77,
"grad_norm": 0.8757584691047668,
"learning_rate": 1.2922959567069016e-06,
"loss": 0.5684,
"step": 12200
},
{
"epoch": 0.77,
"grad_norm": 0.9068610668182373,
"learning_rate": 1.2916076876430821e-06,
"loss": 0.6232,
"step": 12201
},
{
"epoch": 0.77,
"grad_norm": 0.8894833326339722,
"learning_rate": 1.2909195747267622e-06,
"loss": 0.6127,
"step": 12202
},
{
"epoch": 0.77,
"grad_norm": 0.8847464323043823,
"learning_rate": 1.2902316179869179e-06,
"loss": 0.5741,
"step": 12203
},
{
"epoch": 0.77,
"grad_norm": 0.9438949823379517,
"learning_rate": 1.2895438174525127e-06,
"loss": 0.6046,
"step": 12204
},
{
"epoch": 0.77,
"grad_norm": 0.8600268363952637,
"learning_rate": 1.288856173152509e-06,
"loss": 0.574,
"step": 12205
},
{
"epoch": 0.77,
"grad_norm": 0.9138484001159668,
"learning_rate": 1.2881686851158642e-06,
"loss": 0.6331,
"step": 12206
},
{
"epoch": 0.77,
"grad_norm": 0.8854186534881592,
"learning_rate": 1.287481353371522e-06,
"loss": 0.5998,
"step": 12207
},
{
"epoch": 0.77,
"grad_norm": 0.8981321454048157,
"learning_rate": 1.286794177948425e-06,
"loss": 0.6245,
"step": 12208
},
{
"epoch": 0.77,
"grad_norm": 0.9491480588912964,
"learning_rate": 1.286107158875508e-06,
"loss": 0.6036,
"step": 12209
},
{
"epoch": 0.77,
"grad_norm": 0.8554275631904602,
"learning_rate": 1.285420296181699e-06,
"loss": 0.5711,
"step": 12210
},
{
"epoch": 0.77,
"grad_norm": 0.8955265283584595,
"learning_rate": 1.2847335898959207e-06,
"loss": 0.5782,
"step": 12211
},
{
"epoch": 0.77,
"grad_norm": 0.8879252076148987,
"learning_rate": 1.284047040047085e-06,
"loss": 0.6219,
"step": 12212
},
{
"epoch": 0.77,
"grad_norm": 0.922939121723175,
"learning_rate": 1.2833606466641001e-06,
"loss": 0.6014,
"step": 12213
},
{
"epoch": 0.77,
"grad_norm": 0.8796207904815674,
"learning_rate": 1.282674409775872e-06,
"loss": 0.5638,
"step": 12214
},
{
"epoch": 0.77,
"grad_norm": 0.8656979203224182,
"learning_rate": 1.2819883294112918e-06,
"loss": 0.5692,
"step": 12215
},
{
"epoch": 0.77,
"grad_norm": 0.8584392666816711,
"learning_rate": 1.2813024055992486e-06,
"loss": 0.5548,
"step": 12216
},
{
"epoch": 0.77,
"grad_norm": 0.8941633105278015,
"learning_rate": 1.2806166383686258e-06,
"loss": 0.5758,
"step": 12217
},
{
"epoch": 0.77,
"grad_norm": 0.8649379014968872,
"learning_rate": 1.2799310277482952e-06,
"loss": 0.5681,
"step": 12218
},
{
"epoch": 0.77,
"grad_norm": 0.9806539416313171,
"learning_rate": 1.2792455737671306e-06,
"loss": 0.6265,
"step": 12219
},
{
"epoch": 0.77,
"grad_norm": 0.884787380695343,
"learning_rate": 1.27856027645399e-06,
"loss": 0.5289,
"step": 12220
},
{
"epoch": 0.77,
"grad_norm": 0.8743571043014526,
"learning_rate": 1.27787513583773e-06,
"loss": 0.5108,
"step": 12221
},
{
"epoch": 0.77,
"grad_norm": 0.8686521649360657,
"learning_rate": 1.2771901519471997e-06,
"loss": 0.5756,
"step": 12222
},
{
"epoch": 0.77,
"grad_norm": 0.830317497253418,
"learning_rate": 1.2765053248112414e-06,
"loss": 0.5872,
"step": 12223
},
{
"epoch": 0.77,
"grad_norm": 0.9129568338394165,
"learning_rate": 1.2758206544586909e-06,
"loss": 0.602,
"step": 12224
},
{
"epoch": 0.77,
"grad_norm": 0.9156956076622009,
"learning_rate": 1.2751361409183788e-06,
"loss": 0.6216,
"step": 12225
},
{
"epoch": 0.77,
"grad_norm": 0.831794261932373,
"learning_rate": 1.2744517842191228e-06,
"loss": 0.5739,
"step": 12226
},
{
"epoch": 0.77,
"grad_norm": 0.9067423343658447,
"learning_rate": 1.2737675843897452e-06,
"loss": 0.5913,
"step": 12227
},
{
"epoch": 0.77,
"grad_norm": 0.9277194142341614,
"learning_rate": 1.2730835414590498e-06,
"loss": 0.6272,
"step": 12228
},
{
"epoch": 0.77,
"grad_norm": 0.8326361775398254,
"learning_rate": 1.272399655455842e-06,
"loss": 0.5507,
"step": 12229
},
{
"epoch": 0.77,
"grad_norm": 0.8325486183166504,
"learning_rate": 1.2717159264089185e-06,
"loss": 0.5397,
"step": 12230
},
{
"epoch": 0.77,
"grad_norm": 0.9493119716644287,
"learning_rate": 1.2710323543470648e-06,
"loss": 0.5241,
"step": 12231
},
{
"epoch": 0.77,
"grad_norm": 0.9216598868370056,
"learning_rate": 1.2703489392990682e-06,
"loss": 0.5895,
"step": 12232
},
{
"epoch": 0.78,
"grad_norm": 0.9237011671066284,
"learning_rate": 1.2696656812937047e-06,
"loss": 0.6019,
"step": 12233
},
{
"epoch": 0.78,
"grad_norm": 0.8747559189796448,
"learning_rate": 1.268982580359741e-06,
"loss": 0.5257,
"step": 12234
},
{
"epoch": 0.78,
"grad_norm": 0.877472460269928,
"learning_rate": 1.2682996365259415e-06,
"loss": 0.5535,
"step": 12235
},
{
"epoch": 0.78,
"grad_norm": 0.8679016828536987,
"learning_rate": 1.2676168498210623e-06,
"loss": 0.5842,
"step": 12236
},
{
"epoch": 0.78,
"grad_norm": 0.8848540782928467,
"learning_rate": 1.2669342202738537e-06,
"loss": 0.5703,
"step": 12237
},
{
"epoch": 0.78,
"grad_norm": 0.8749752640724182,
"learning_rate": 1.2662517479130605e-06,
"loss": 0.5588,
"step": 12238
},
{
"epoch": 0.78,
"grad_norm": 0.9668585062026978,
"learning_rate": 1.2655694327674145e-06,
"loss": 0.5812,
"step": 12239
},
{
"epoch": 0.78,
"grad_norm": 0.9180838465690613,
"learning_rate": 1.2648872748656498e-06,
"loss": 0.5711,
"step": 12240
},
{
"epoch": 0.78,
"grad_norm": 0.9085766077041626,
"learning_rate": 1.2642052742364903e-06,
"loss": 0.5718,
"step": 12241
},
{
"epoch": 0.78,
"grad_norm": 0.8867596983909607,
"learning_rate": 1.2635234309086486e-06,
"loss": 0.5779,
"step": 12242
},
{
"epoch": 0.78,
"grad_norm": 0.9551423788070679,
"learning_rate": 1.2628417449108376e-06,
"loss": 0.6215,
"step": 12243
},
{
"epoch": 0.78,
"grad_norm": 0.9479497671127319,
"learning_rate": 1.2621602162717594e-06,
"loss": 0.5509,
"step": 12244
},
{
"epoch": 0.78,
"grad_norm": 0.8776799440383911,
"learning_rate": 1.261478845020112e-06,
"loss": 0.5186,
"step": 12245
},
{
"epoch": 0.78,
"grad_norm": 0.8843742609024048,
"learning_rate": 1.2607976311845865e-06,
"loss": 0.51,
"step": 12246
},
{
"epoch": 0.78,
"grad_norm": 0.8797292709350586,
"learning_rate": 1.2601165747938638e-06,
"loss": 0.5066,
"step": 12247
},
{
"epoch": 0.78,
"grad_norm": 0.9293206930160522,
"learning_rate": 1.2594356758766201e-06,
"loss": 0.542,
"step": 12248
},
{
"epoch": 0.78,
"grad_norm": 0.8314594030380249,
"learning_rate": 1.2587549344615308e-06,
"loss": 0.5139,
"step": 12249
},
{
"epoch": 0.78,
"grad_norm": 0.893222451210022,
"learning_rate": 1.2580743505772553e-06,
"loss": 0.5823,
"step": 12250
},
{
"epoch": 0.78,
"grad_norm": 0.8597607016563416,
"learning_rate": 1.2573939242524508e-06,
"loss": 0.5671,
"step": 12251
},
{
"epoch": 0.78,
"grad_norm": 0.8614668846130371,
"learning_rate": 1.2567136555157694e-06,
"loss": 0.5764,
"step": 12252
},
{
"epoch": 0.78,
"grad_norm": 0.8970388174057007,
"learning_rate": 1.2560335443958533e-06,
"loss": 0.5439,
"step": 12253
},
{
"epoch": 0.78,
"grad_norm": 0.8344459533691406,
"learning_rate": 1.2553535909213422e-06,
"loss": 0.5791,
"step": 12254
},
{
"epoch": 0.78,
"grad_norm": 0.8963201642036438,
"learning_rate": 1.254673795120863e-06,
"loss": 0.578,
"step": 12255
},
{
"epoch": 0.78,
"grad_norm": 0.9341084957122803,
"learning_rate": 1.2539941570230402e-06,
"loss": 0.547,
"step": 12256
},
{
"epoch": 0.78,
"grad_norm": 0.8028890490531921,
"learning_rate": 1.2533146766564946e-06,
"loss": 0.5386,
"step": 12257
},
{
"epoch": 0.78,
"grad_norm": 0.959701657295227,
"learning_rate": 1.252635354049833e-06,
"loss": 0.5884,
"step": 12258
},
{
"epoch": 0.78,
"grad_norm": 0.8597055077552795,
"learning_rate": 1.2519561892316606e-06,
"loss": 0.5718,
"step": 12259
},
{
"epoch": 0.78,
"grad_norm": 0.8948055505752563,
"learning_rate": 1.2512771822305742e-06,
"loss": 0.5758,
"step": 12260
},
{
"epoch": 0.78,
"grad_norm": 0.9038350582122803,
"learning_rate": 1.2505983330751654e-06,
"loss": 0.5624,
"step": 12261
},
{
"epoch": 0.78,
"grad_norm": 0.8576204776763916,
"learning_rate": 1.2499196417940168e-06,
"loss": 0.6144,
"step": 12262
},
{
"epoch": 0.78,
"grad_norm": 0.8939236998558044,
"learning_rate": 1.2492411084157086e-06,
"loss": 0.5711,
"step": 12263
},
{
"epoch": 0.78,
"grad_norm": 0.859489381313324,
"learning_rate": 1.2485627329688076e-06,
"loss": 0.5811,
"step": 12264
},
{
"epoch": 0.78,
"grad_norm": 0.834520161151886,
"learning_rate": 1.2478845154818798e-06,
"loss": 0.5552,
"step": 12265
},
{
"epoch": 0.78,
"grad_norm": 0.9146906733512878,
"learning_rate": 1.2472064559834818e-06,
"loss": 0.6202,
"step": 12266
},
{
"epoch": 0.78,
"grad_norm": 0.8180127739906311,
"learning_rate": 1.2465285545021655e-06,
"loss": 0.5558,
"step": 12267
},
{
"epoch": 0.78,
"grad_norm": 0.8662042021751404,
"learning_rate": 1.2458508110664758e-06,
"loss": 0.5808,
"step": 12268
},
{
"epoch": 0.78,
"grad_norm": 0.8688540458679199,
"learning_rate": 1.2451732257049458e-06,
"loss": 0.6074,
"step": 12269
},
{
"epoch": 0.78,
"grad_norm": 0.8783148527145386,
"learning_rate": 1.2444957984461103e-06,
"loss": 0.5658,
"step": 12270
},
{
"epoch": 0.78,
"grad_norm": 0.8849241733551025,
"learning_rate": 1.243818529318494e-06,
"loss": 0.6209,
"step": 12271
},
{
"epoch": 0.78,
"grad_norm": 0.8112246990203857,
"learning_rate": 1.2431414183506114e-06,
"loss": 0.5052,
"step": 12272
},
{
"epoch": 0.78,
"grad_norm": 0.9490674734115601,
"learning_rate": 1.2424644655709744e-06,
"loss": 0.5935,
"step": 12273
},
{
"epoch": 0.78,
"grad_norm": 0.892665445804596,
"learning_rate": 1.2417876710080872e-06,
"loss": 0.5661,
"step": 12274
},
{
"epoch": 0.78,
"grad_norm": 0.8753570318222046,
"learning_rate": 1.2411110346904471e-06,
"loss": 0.5632,
"step": 12275
},
{
"epoch": 0.78,
"grad_norm": 0.8728823661804199,
"learning_rate": 1.2404345566465464e-06,
"loss": 0.5697,
"step": 12276
},
{
"epoch": 0.78,
"grad_norm": 0.8521443009376526,
"learning_rate": 1.2397582369048672e-06,
"loss": 0.5444,
"step": 12277
},
{
"epoch": 0.78,
"grad_norm": 0.8864396214485168,
"learning_rate": 1.2390820754938859e-06,
"loss": 0.6033,
"step": 12278
},
{
"epoch": 0.78,
"grad_norm": 0.9159855842590332,
"learning_rate": 1.2384060724420776e-06,
"loss": 0.6399,
"step": 12279
},
{
"epoch": 0.78,
"grad_norm": 0.8791429400444031,
"learning_rate": 1.2377302277779029e-06,
"loss": 0.5495,
"step": 12280
},
{
"epoch": 0.78,
"grad_norm": 0.8864124417304993,
"learning_rate": 1.2370545415298207e-06,
"loss": 0.555,
"step": 12281
},
{
"epoch": 0.78,
"grad_norm": 0.8805786967277527,
"learning_rate": 1.236379013726281e-06,
"loss": 0.576,
"step": 12282
},
{
"epoch": 0.78,
"grad_norm": 0.9043353199958801,
"learning_rate": 1.2357036443957283e-06,
"loss": 0.5943,
"step": 12283
},
{
"epoch": 0.78,
"grad_norm": 0.8610161542892456,
"learning_rate": 1.2350284335666019e-06,
"loss": 0.5871,
"step": 12284
},
{
"epoch": 0.78,
"grad_norm": 0.8844594359397888,
"learning_rate": 1.2343533812673286e-06,
"loss": 0.5531,
"step": 12285
},
{
"epoch": 0.78,
"grad_norm": 0.9110302925109863,
"learning_rate": 1.2336784875263341e-06,
"loss": 0.5183,
"step": 12286
},
{
"epoch": 0.78,
"grad_norm": 0.9357644319534302,
"learning_rate": 1.233003752372039e-06,
"loss": 0.5973,
"step": 12287
},
{
"epoch": 0.78,
"grad_norm": 0.865262508392334,
"learning_rate": 1.23232917583285e-06,
"loss": 0.5508,
"step": 12288
},
{
"epoch": 0.78,
"grad_norm": 0.8521873354911804,
"learning_rate": 1.2316547579371724e-06,
"loss": 0.5563,
"step": 12289
},
{
"epoch": 0.78,
"grad_norm": 0.8528589010238647,
"learning_rate": 1.230980498713404e-06,
"loss": 0.5659,
"step": 12290
},
{
"epoch": 0.78,
"grad_norm": 0.9339171648025513,
"learning_rate": 1.2303063981899355e-06,
"loss": 0.5992,
"step": 12291
},
{
"epoch": 0.78,
"grad_norm": 0.8944584131240845,
"learning_rate": 1.2296324563951517e-06,
"loss": 0.5916,
"step": 12292
},
{
"epoch": 0.78,
"grad_norm": 0.8510122299194336,
"learning_rate": 1.2289586733574283e-06,
"loss": 0.5496,
"step": 12293
},
{
"epoch": 0.78,
"grad_norm": 0.9237475991249084,
"learning_rate": 1.2282850491051363e-06,
"loss": 0.5481,
"step": 12294
},
{
"epoch": 0.78,
"grad_norm": 0.8931830525398254,
"learning_rate": 1.2276115836666396e-06,
"loss": 0.5749,
"step": 12295
},
{
"epoch": 0.78,
"grad_norm": 0.9052478671073914,
"learning_rate": 1.2269382770702964e-06,
"loss": 0.5891,
"step": 12296
},
{
"epoch": 0.78,
"grad_norm": 0.8357118964195251,
"learning_rate": 1.2262651293444572e-06,
"loss": 0.552,
"step": 12297
},
{
"epoch": 0.78,
"grad_norm": 0.8756887912750244,
"learning_rate": 1.2255921405174664e-06,
"loss": 0.5766,
"step": 12298
},
{
"epoch": 0.78,
"grad_norm": 0.8769047260284424,
"learning_rate": 1.2249193106176578e-06,
"loss": 0.5404,
"step": 12299
},
{
"epoch": 0.78,
"grad_norm": 0.7944373488426208,
"learning_rate": 1.224246639673367e-06,
"loss": 0.5492,
"step": 12300
},
{
"epoch": 0.78,
"grad_norm": 0.904240071773529,
"learning_rate": 1.2235741277129143e-06,
"loss": 0.5788,
"step": 12301
},
{
"epoch": 0.78,
"grad_norm": 0.8664789795875549,
"learning_rate": 1.2229017747646178e-06,
"loss": 0.5623,
"step": 12302
},
{
"epoch": 0.78,
"grad_norm": 0.9359251260757446,
"learning_rate": 1.2222295808567874e-06,
"loss": 0.5974,
"step": 12303
},
{
"epoch": 0.78,
"grad_norm": 0.9124470353126526,
"learning_rate": 1.2215575460177282e-06,
"loss": 0.5919,
"step": 12304
},
{
"epoch": 0.78,
"grad_norm": 0.8797950148582458,
"learning_rate": 1.220885670275736e-06,
"loss": 0.5813,
"step": 12305
},
{
"epoch": 0.78,
"grad_norm": 0.9038450717926025,
"learning_rate": 1.2202139536591035e-06,
"loss": 0.5688,
"step": 12306
},
{
"epoch": 0.78,
"grad_norm": 0.8724334239959717,
"learning_rate": 1.2195423961961089e-06,
"loss": 0.5828,
"step": 12307
},
{
"epoch": 0.78,
"grad_norm": 0.8944981098175049,
"learning_rate": 1.2188709979150366e-06,
"loss": 0.589,
"step": 12308
},
{
"epoch": 0.78,
"grad_norm": 0.9792714715003967,
"learning_rate": 1.2181997588441507e-06,
"loss": 0.591,
"step": 12309
},
{
"epoch": 0.78,
"grad_norm": 0.8511162400245667,
"learning_rate": 1.2175286790117174e-06,
"loss": 0.5557,
"step": 12310
},
{
"epoch": 0.78,
"grad_norm": 0.8353309035301208,
"learning_rate": 1.2168577584459944e-06,
"loss": 0.5897,
"step": 12311
},
{
"epoch": 0.78,
"grad_norm": 0.9010828733444214,
"learning_rate": 1.2161869971752283e-06,
"loss": 0.6036,
"step": 12312
},
{
"epoch": 0.78,
"grad_norm": 0.9711151719093323,
"learning_rate": 1.2155163952276654e-06,
"loss": 0.626,
"step": 12313
},
{
"epoch": 0.78,
"grad_norm": 0.8932445049285889,
"learning_rate": 1.2148459526315442e-06,
"loss": 0.6017,
"step": 12314
},
{
"epoch": 0.78,
"grad_norm": 0.8985554575920105,
"learning_rate": 1.2141756694150903e-06,
"loss": 0.5927,
"step": 12315
},
{
"epoch": 0.78,
"grad_norm": 0.8908417820930481,
"learning_rate": 1.2135055456065292e-06,
"loss": 0.5449,
"step": 12316
},
{
"epoch": 0.78,
"grad_norm": 0.9284818768501282,
"learning_rate": 1.2128355812340776e-06,
"loss": 0.583,
"step": 12317
},
{
"epoch": 0.78,
"grad_norm": 0.9066043496131897,
"learning_rate": 1.2121657763259448e-06,
"loss": 0.5134,
"step": 12318
},
{
"epoch": 0.78,
"grad_norm": 0.8915185332298279,
"learning_rate": 1.211496130910334e-06,
"loss": 0.5315,
"step": 12319
},
{
"epoch": 0.78,
"grad_norm": 0.8998283743858337,
"learning_rate": 1.2108266450154422e-06,
"loss": 0.5922,
"step": 12320
},
{
"epoch": 0.78,
"grad_norm": 0.9139837622642517,
"learning_rate": 1.2101573186694587e-06,
"loss": 0.5897,
"step": 12321
},
{
"epoch": 0.78,
"grad_norm": 0.8933039307594299,
"learning_rate": 1.209488151900568e-06,
"loss": 0.5763,
"step": 12322
},
{
"epoch": 0.78,
"grad_norm": 0.9665024876594543,
"learning_rate": 1.2088191447369436e-06,
"loss": 0.6145,
"step": 12323
},
{
"epoch": 0.78,
"grad_norm": 0.9376837015151978,
"learning_rate": 1.2081502972067567e-06,
"loss": 0.5251,
"step": 12324
},
{
"epoch": 0.78,
"grad_norm": 0.8636725544929504,
"learning_rate": 1.2074816093381696e-06,
"loss": 0.5737,
"step": 12325
},
{
"epoch": 0.78,
"grad_norm": 0.8847807049751282,
"learning_rate": 1.2068130811593387e-06,
"loss": 0.5633,
"step": 12326
},
{
"epoch": 0.78,
"grad_norm": 0.8365536332130432,
"learning_rate": 1.2061447126984138e-06,
"loss": 0.5499,
"step": 12327
},
{
"epoch": 0.78,
"grad_norm": 0.9122150540351868,
"learning_rate": 1.2054765039835382e-06,
"loss": 0.5775,
"step": 12328
},
{
"epoch": 0.78,
"grad_norm": 0.8604024052619934,
"learning_rate": 1.2048084550428442e-06,
"loss": 0.5087,
"step": 12329
},
{
"epoch": 0.78,
"grad_norm": 0.9017311334609985,
"learning_rate": 1.2041405659044664e-06,
"loss": 0.5894,
"step": 12330
},
{
"epoch": 0.78,
"grad_norm": 0.8747026324272156,
"learning_rate": 1.203472836596523e-06,
"loss": 0.5661,
"step": 12331
},
{
"epoch": 0.78,
"grad_norm": 0.8790405988693237,
"learning_rate": 1.2028052671471318e-06,
"loss": 0.6278,
"step": 12332
},
{
"epoch": 0.78,
"grad_norm": 0.8900148868560791,
"learning_rate": 1.2021378575844005e-06,
"loss": 0.5758,
"step": 12333
},
{
"epoch": 0.78,
"grad_norm": 0.85635906457901,
"learning_rate": 1.201470607936433e-06,
"loss": 0.5376,
"step": 12334
},
{
"epoch": 0.78,
"grad_norm": 0.9126538038253784,
"learning_rate": 1.2008035182313237e-06,
"loss": 0.5883,
"step": 12335
},
{
"epoch": 0.78,
"grad_norm": 0.8496696949005127,
"learning_rate": 1.2001365884971634e-06,
"loss": 0.5212,
"step": 12336
},
{
"epoch": 0.78,
"grad_norm": 0.9417339563369751,
"learning_rate": 1.1994698187620297e-06,
"loss": 0.6001,
"step": 12337
},
{
"epoch": 0.78,
"grad_norm": 0.9529904127120972,
"learning_rate": 1.1988032090540036e-06,
"loss": 0.6143,
"step": 12338
},
{
"epoch": 0.78,
"grad_norm": 0.8431176543235779,
"learning_rate": 1.1981367594011496e-06,
"loss": 0.5776,
"step": 12339
},
{
"epoch": 0.78,
"grad_norm": 0.8899187445640564,
"learning_rate": 1.1974704698315309e-06,
"loss": 0.5197,
"step": 12340
},
{
"epoch": 0.78,
"grad_norm": 0.8935467004776001,
"learning_rate": 1.1968043403732044e-06,
"loss": 0.5667,
"step": 12341
},
{
"epoch": 0.78,
"grad_norm": 0.9127129316329956,
"learning_rate": 1.1961383710542135e-06,
"loss": 0.5768,
"step": 12342
},
{
"epoch": 0.78,
"grad_norm": 0.8471307754516602,
"learning_rate": 1.1954725619026048e-06,
"loss": 0.5569,
"step": 12343
},
{
"epoch": 0.78,
"grad_norm": 0.7840201258659363,
"learning_rate": 1.1948069129464128e-06,
"loss": 0.572,
"step": 12344
},
{
"epoch": 0.78,
"grad_norm": 0.9100561738014221,
"learning_rate": 1.1941414242136635e-06,
"loss": 0.5413,
"step": 12345
},
{
"epoch": 0.78,
"grad_norm": 0.910991370677948,
"learning_rate": 1.1934760957323782e-06,
"loss": 0.569,
"step": 12346
},
{
"epoch": 0.78,
"grad_norm": 0.9393359422683716,
"learning_rate": 1.1928109275305734e-06,
"loss": 0.6035,
"step": 12347
},
{
"epoch": 0.78,
"grad_norm": 0.8058403134346008,
"learning_rate": 1.1921459196362562e-06,
"loss": 0.5626,
"step": 12348
},
{
"epoch": 0.78,
"grad_norm": 0.8618589639663696,
"learning_rate": 1.1914810720774289e-06,
"loss": 0.5869,
"step": 12349
},
{
"epoch": 0.78,
"grad_norm": 0.924644410610199,
"learning_rate": 1.190816384882082e-06,
"loss": 0.6129,
"step": 12350
},
{
"epoch": 0.78,
"grad_norm": 0.8994777202606201,
"learning_rate": 1.1901518580782073e-06,
"loss": 0.5791,
"step": 12351
},
{
"epoch": 0.78,
"grad_norm": 0.9266136288642883,
"learning_rate": 1.1894874916937855e-06,
"loss": 0.6058,
"step": 12352
},
{
"epoch": 0.78,
"grad_norm": 0.854226291179657,
"learning_rate": 1.1888232857567888e-06,
"loss": 0.5411,
"step": 12353
},
{
"epoch": 0.78,
"grad_norm": 0.9290048480033875,
"learning_rate": 1.1881592402951853e-06,
"loss": 0.5852,
"step": 12354
},
{
"epoch": 0.78,
"grad_norm": 0.9401707649230957,
"learning_rate": 1.1874953553369351e-06,
"loss": 0.5687,
"step": 12355
},
{
"epoch": 0.78,
"grad_norm": 0.8896312713623047,
"learning_rate": 1.1868316309099937e-06,
"loss": 0.5607,
"step": 12356
},
{
"epoch": 0.78,
"grad_norm": 0.9019138216972351,
"learning_rate": 1.186168067042308e-06,
"loss": 0.6016,
"step": 12357
},
{
"epoch": 0.78,
"grad_norm": 0.8993564248085022,
"learning_rate": 1.1855046637618168e-06,
"loss": 0.6566,
"step": 12358
},
{
"epoch": 0.78,
"grad_norm": 0.9466603994369507,
"learning_rate": 1.1848414210964526e-06,
"loss": 0.6339,
"step": 12359
},
{
"epoch": 0.78,
"grad_norm": 0.9109975099563599,
"learning_rate": 1.1841783390741473e-06,
"loss": 0.5418,
"step": 12360
},
{
"epoch": 0.78,
"grad_norm": 0.963789701461792,
"learning_rate": 1.1835154177228165e-06,
"loss": 0.5697,
"step": 12361
},
{
"epoch": 0.78,
"grad_norm": 0.9295591115951538,
"learning_rate": 1.1828526570703747e-06,
"loss": 0.6339,
"step": 12362
},
{
"epoch": 0.78,
"grad_norm": 0.9075334668159485,
"learning_rate": 1.1821900571447286e-06,
"loss": 0.5751,
"step": 12363
},
{
"epoch": 0.78,
"grad_norm": 0.8823397159576416,
"learning_rate": 1.1815276179737778e-06,
"loss": 0.5023,
"step": 12364
},
{
"epoch": 0.78,
"grad_norm": 0.8924038410186768,
"learning_rate": 1.1808653395854174e-06,
"loss": 0.5934,
"step": 12365
},
{
"epoch": 0.78,
"grad_norm": 0.8544875979423523,
"learning_rate": 1.1802032220075299e-06,
"loss": 0.5516,
"step": 12366
},
{
"epoch": 0.78,
"grad_norm": 0.9305124282836914,
"learning_rate": 1.1795412652679955e-06,
"loss": 0.5981,
"step": 12367
},
{
"epoch": 0.78,
"grad_norm": 0.8870397806167603,
"learning_rate": 1.178879469394691e-06,
"loss": 0.5729,
"step": 12368
},
{
"epoch": 0.78,
"grad_norm": 0.9008588194847107,
"learning_rate": 1.1782178344154776e-06,
"loss": 0.5062,
"step": 12369
},
{
"epoch": 0.78,
"grad_norm": 0.8877758383750916,
"learning_rate": 1.1775563603582162e-06,
"loss": 0.5799,
"step": 12370
},
{
"epoch": 0.78,
"grad_norm": 0.882616400718689,
"learning_rate": 1.1768950472507605e-06,
"loss": 0.5285,
"step": 12371
},
{
"epoch": 0.78,
"grad_norm": 0.9182950854301453,
"learning_rate": 1.1762338951209524e-06,
"loss": 0.6087,
"step": 12372
},
{
"epoch": 0.78,
"grad_norm": 0.9180057048797607,
"learning_rate": 1.1755729039966358e-06,
"loss": 0.5914,
"step": 12373
},
{
"epoch": 0.78,
"grad_norm": 0.8797052502632141,
"learning_rate": 1.174912073905638e-06,
"loss": 0.5412,
"step": 12374
},
{
"epoch": 0.78,
"grad_norm": 0.8645214438438416,
"learning_rate": 1.174251404875787e-06,
"loss": 0.564,
"step": 12375
},
{
"epoch": 0.78,
"grad_norm": 0.8359499573707581,
"learning_rate": 1.1735908969349002e-06,
"loss": 0.546,
"step": 12376
},
{
"epoch": 0.78,
"grad_norm": 0.8409955501556396,
"learning_rate": 1.1729305501107897e-06,
"loss": 0.5585,
"step": 12377
},
{
"epoch": 0.78,
"grad_norm": 0.8773937821388245,
"learning_rate": 1.1722703644312599e-06,
"loss": 0.5409,
"step": 12378
},
{
"epoch": 0.78,
"grad_norm": 0.8302714824676514,
"learning_rate": 1.1716103399241113e-06,
"loss": 0.5417,
"step": 12379
},
{
"epoch": 0.78,
"grad_norm": 0.8925114870071411,
"learning_rate": 1.1709504766171298e-06,
"loss": 0.5455,
"step": 12380
},
{
"epoch": 0.78,
"grad_norm": 0.8733065724372864,
"learning_rate": 1.170290774538107e-06,
"loss": 0.6242,
"step": 12381
},
{
"epoch": 0.78,
"grad_norm": 0.8538454174995422,
"learning_rate": 1.1696312337148152e-06,
"loss": 0.5389,
"step": 12382
},
{
"epoch": 0.78,
"grad_norm": 0.9496648907661438,
"learning_rate": 1.1689718541750278e-06,
"loss": 0.6046,
"step": 12383
},
{
"epoch": 0.78,
"grad_norm": 0.9294766187667847,
"learning_rate": 1.168312635946508e-06,
"loss": 0.6338,
"step": 12384
},
{
"epoch": 0.78,
"grad_norm": 0.8941949605941772,
"learning_rate": 1.1676535790570137e-06,
"loss": 0.5433,
"step": 12385
},
{
"epoch": 0.78,
"grad_norm": 0.9269332885742188,
"learning_rate": 1.1669946835342956e-06,
"loss": 0.5374,
"step": 12386
},
{
"epoch": 0.78,
"grad_norm": 0.9914242029190063,
"learning_rate": 1.1663359494060983e-06,
"loss": 0.6343,
"step": 12387
},
{
"epoch": 0.78,
"grad_norm": 0.8914863467216492,
"learning_rate": 1.1656773767001566e-06,
"loss": 0.6055,
"step": 12388
},
{
"epoch": 0.78,
"grad_norm": 0.9513469934463501,
"learning_rate": 1.1650189654442024e-06,
"loss": 0.5674,
"step": 12389
},
{
"epoch": 0.78,
"grad_norm": 0.891490638256073,
"learning_rate": 1.1643607156659582e-06,
"loss": 0.5888,
"step": 12390
},
{
"epoch": 0.79,
"grad_norm": 0.9232016205787659,
"learning_rate": 1.1637026273931413e-06,
"loss": 0.5609,
"step": 12391
},
{
"epoch": 0.79,
"grad_norm": 0.9406624436378479,
"learning_rate": 1.1630447006534606e-06,
"loss": 0.5978,
"step": 12392
},
{
"epoch": 0.79,
"grad_norm": 0.9257485866546631,
"learning_rate": 1.1623869354746203e-06,
"loss": 0.5876,
"step": 12393
},
{
"epoch": 0.79,
"grad_norm": 0.985722541809082,
"learning_rate": 1.1617293318843164e-06,
"loss": 0.6456,
"step": 12394
},
{
"epoch": 0.79,
"grad_norm": 0.8931677341461182,
"learning_rate": 1.1610718899102392e-06,
"loss": 0.5579,
"step": 12395
},
{
"epoch": 0.79,
"grad_norm": 0.9458318948745728,
"learning_rate": 1.1604146095800684e-06,
"loss": 0.5987,
"step": 12396
},
{
"epoch": 0.79,
"grad_norm": 0.8708502650260925,
"learning_rate": 1.1597574909214808e-06,
"loss": 0.6126,
"step": 12397
},
{
"epoch": 0.79,
"grad_norm": 0.937856912612915,
"learning_rate": 1.159100533962147e-06,
"loss": 0.5574,
"step": 12398
},
{
"epoch": 0.79,
"grad_norm": 0.9945967793464661,
"learning_rate": 1.1584437387297283e-06,
"loss": 0.6743,
"step": 12399
},
{
"epoch": 0.79,
"grad_norm": 0.8693322539329529,
"learning_rate": 1.157787105251879e-06,
"loss": 0.5495,
"step": 12400
},
{
"epoch": 0.79,
"grad_norm": 0.8383262157440186,
"learning_rate": 1.157130633556251e-06,
"loss": 0.4967,
"step": 12401
},
{
"epoch": 0.79,
"grad_norm": 0.9006306529045105,
"learning_rate": 1.1564743236704801e-06,
"loss": 0.5194,
"step": 12402
},
{
"epoch": 0.79,
"grad_norm": 0.909695565700531,
"learning_rate": 1.1558181756222081e-06,
"loss": 0.5596,
"step": 12403
},
{
"epoch": 0.79,
"grad_norm": 0.9043020009994507,
"learning_rate": 1.1551621894390586e-06,
"loss": 0.5476,
"step": 12404
},
{
"epoch": 0.79,
"grad_norm": 0.9523054957389832,
"learning_rate": 1.1545063651486533e-06,
"loss": 0.6002,
"step": 12405
},
{
"epoch": 0.79,
"grad_norm": 0.8855312466621399,
"learning_rate": 1.1538507027786077e-06,
"loss": 0.6329,
"step": 12406
},
{
"epoch": 0.79,
"grad_norm": 0.8605347275733948,
"learning_rate": 1.1531952023565295e-06,
"loss": 0.5569,
"step": 12407
},
{
"epoch": 0.79,
"grad_norm": 0.8833476901054382,
"learning_rate": 1.1525398639100194e-06,
"loss": 0.5713,
"step": 12408
},
{
"epoch": 0.79,
"grad_norm": 0.9746968746185303,
"learning_rate": 1.1518846874666723e-06,
"loss": 0.5846,
"step": 12409
},
{
"epoch": 0.79,
"grad_norm": 0.9625856280326843,
"learning_rate": 1.1512296730540717e-06,
"loss": 0.5859,
"step": 12410
},
{
"epoch": 0.79,
"grad_norm": 0.8508451581001282,
"learning_rate": 1.1505748206998036e-06,
"loss": 0.5348,
"step": 12411
},
{
"epoch": 0.79,
"grad_norm": 0.8078241944313049,
"learning_rate": 1.1499201304314372e-06,
"loss": 0.5757,
"step": 12412
},
{
"epoch": 0.79,
"grad_norm": 0.9019641280174255,
"learning_rate": 1.149265602276541e-06,
"loss": 0.6191,
"step": 12413
},
{
"epoch": 0.79,
"grad_norm": 0.9142687320709229,
"learning_rate": 1.1486112362626738e-06,
"loss": 0.6163,
"step": 12414
},
{
"epoch": 0.79,
"grad_norm": 0.8982365131378174,
"learning_rate": 1.14795703241739e-06,
"loss": 0.5845,
"step": 12415
},
{
"epoch": 0.79,
"grad_norm": 0.8687730431556702,
"learning_rate": 1.1473029907682348e-06,
"loss": 0.497,
"step": 12416
},
{
"epoch": 0.79,
"grad_norm": 0.9555364847183228,
"learning_rate": 1.1466491113427503e-06,
"loss": 0.5448,
"step": 12417
},
{
"epoch": 0.79,
"grad_norm": 0.9418555498123169,
"learning_rate": 1.1459953941684648e-06,
"loss": 0.5793,
"step": 12418
},
{
"epoch": 0.79,
"grad_norm": 0.8871247172355652,
"learning_rate": 1.1453418392729065e-06,
"loss": 0.5731,
"step": 12419
},
{
"epoch": 0.79,
"grad_norm": 0.9137433171272278,
"learning_rate": 1.1446884466835933e-06,
"loss": 0.6143,
"step": 12420
},
{
"epoch": 0.79,
"grad_norm": 0.9435691237449646,
"learning_rate": 1.1440352164280388e-06,
"loss": 0.5642,
"step": 12421
},
{
"epoch": 0.79,
"grad_norm": 0.8553792834281921,
"learning_rate": 1.1433821485337487e-06,
"loss": 0.55,
"step": 12422
},
{
"epoch": 0.79,
"grad_norm": 0.8792483806610107,
"learning_rate": 1.1427292430282165e-06,
"loss": 0.5809,
"step": 12423
},
{
"epoch": 0.79,
"grad_norm": 0.8634614944458008,
"learning_rate": 1.14207649993894e-06,
"loss": 0.5819,
"step": 12424
},
{
"epoch": 0.79,
"grad_norm": 0.904400110244751,
"learning_rate": 1.1414239192934019e-06,
"loss": 0.5542,
"step": 12425
},
{
"epoch": 0.79,
"grad_norm": 0.9054979085922241,
"learning_rate": 1.1407715011190784e-06,
"loss": 0.5524,
"step": 12426
},
{
"epoch": 0.79,
"grad_norm": 0.9589418768882751,
"learning_rate": 1.1401192454434418e-06,
"loss": 0.5837,
"step": 12427
},
{
"epoch": 0.79,
"grad_norm": 0.9193122386932373,
"learning_rate": 1.139467152293956e-06,
"loss": 0.5805,
"step": 12428
},
{
"epoch": 0.79,
"grad_norm": 0.9566795825958252,
"learning_rate": 1.138815221698079e-06,
"loss": 0.6101,
"step": 12429
},
{
"epoch": 0.79,
"grad_norm": 0.8651120662689209,
"learning_rate": 1.138163453683262e-06,
"loss": 0.5513,
"step": 12430
},
{
"epoch": 0.79,
"grad_norm": 0.8350364565849304,
"learning_rate": 1.1375118482769447e-06,
"loss": 0.5298,
"step": 12431
},
{
"epoch": 0.79,
"grad_norm": 0.8395058512687683,
"learning_rate": 1.136860405506569e-06,
"loss": 0.5459,
"step": 12432
},
{
"epoch": 0.79,
"grad_norm": 0.8627316951751709,
"learning_rate": 1.1362091253995632e-06,
"loss": 0.5212,
"step": 12433
},
{
"epoch": 0.79,
"grad_norm": 0.8814694285392761,
"learning_rate": 1.1355580079833496e-06,
"loss": 0.5991,
"step": 12434
},
{
"epoch": 0.79,
"grad_norm": 0.8934352397918701,
"learning_rate": 1.134907053285344e-06,
"loss": 0.5512,
"step": 12435
},
{
"epoch": 0.79,
"grad_norm": 0.9029948711395264,
"learning_rate": 1.1342562613329571e-06,
"loss": 0.5956,
"step": 12436
},
{
"epoch": 0.79,
"grad_norm": 0.8928791284561157,
"learning_rate": 1.133605632153591e-06,
"loss": 0.5683,
"step": 12437
},
{
"epoch": 0.79,
"grad_norm": 0.9214066863059998,
"learning_rate": 1.1329551657746429e-06,
"loss": 0.6537,
"step": 12438
},
{
"epoch": 0.79,
"grad_norm": 0.9235839247703552,
"learning_rate": 1.132304862223499e-06,
"loss": 0.587,
"step": 12439
},
{
"epoch": 0.79,
"grad_norm": 0.8503764271736145,
"learning_rate": 1.1316547215275409e-06,
"loss": 0.6163,
"step": 12440
},
{
"epoch": 0.79,
"grad_norm": 0.8700659275054932,
"learning_rate": 1.1310047437141485e-06,
"loss": 0.5875,
"step": 12441
},
{
"epoch": 0.79,
"grad_norm": 0.9010173678398132,
"learning_rate": 1.1303549288106857e-06,
"loss": 0.5572,
"step": 12442
},
{
"epoch": 0.79,
"grad_norm": 0.906274676322937,
"learning_rate": 1.1297052768445154e-06,
"loss": 0.5747,
"step": 12443
},
{
"epoch": 0.79,
"grad_norm": 0.9110028147697449,
"learning_rate": 1.129055787842992e-06,
"loss": 0.5646,
"step": 12444
},
{
"epoch": 0.79,
"grad_norm": 0.9146189093589783,
"learning_rate": 1.1284064618334634e-06,
"loss": 0.5341,
"step": 12445
},
{
"epoch": 0.79,
"grad_norm": 0.9614166021347046,
"learning_rate": 1.1277572988432716e-06,
"loss": 0.5856,
"step": 12446
},
{
"epoch": 0.79,
"grad_norm": 0.8432893753051758,
"learning_rate": 1.1271082988997485e-06,
"loss": 0.5756,
"step": 12447
},
{
"epoch": 0.79,
"grad_norm": 0.9383045434951782,
"learning_rate": 1.1264594620302216e-06,
"loss": 0.6037,
"step": 12448
},
{
"epoch": 0.79,
"grad_norm": 0.8760130405426025,
"learning_rate": 1.1258107882620117e-06,
"loss": 0.5568,
"step": 12449
},
{
"epoch": 0.79,
"grad_norm": 0.9355485439300537,
"learning_rate": 1.1251622776224325e-06,
"loss": 0.5927,
"step": 12450
},
{
"epoch": 0.79,
"grad_norm": 0.9326279759407043,
"learning_rate": 1.1245139301387903e-06,
"loss": 0.5338,
"step": 12451
},
{
"epoch": 0.79,
"grad_norm": 0.9106242060661316,
"learning_rate": 1.1238657458383857e-06,
"loss": 0.5986,
"step": 12452
},
{
"epoch": 0.79,
"grad_norm": 0.9031466841697693,
"learning_rate": 1.1232177247485076e-06,
"loss": 0.5867,
"step": 12453
},
{
"epoch": 0.79,
"grad_norm": 0.8840919137001038,
"learning_rate": 1.122569866896448e-06,
"loss": 0.5369,
"step": 12454
},
{
"epoch": 0.79,
"grad_norm": 0.875639021396637,
"learning_rate": 1.1219221723094815e-06,
"loss": 0.5746,
"step": 12455
},
{
"epoch": 0.79,
"grad_norm": 0.9193491339683533,
"learning_rate": 1.1212746410148807e-06,
"loss": 0.6223,
"step": 12456
},
{
"epoch": 0.79,
"grad_norm": 0.9045842885971069,
"learning_rate": 1.120627273039912e-06,
"loss": 0.6001,
"step": 12457
},
{
"epoch": 0.79,
"grad_norm": 0.9147197008132935,
"learning_rate": 1.119980068411834e-06,
"loss": 0.5921,
"step": 12458
},
{
"epoch": 0.79,
"grad_norm": 0.9671141505241394,
"learning_rate": 1.1193330271578968e-06,
"loss": 0.6136,
"step": 12459
},
{
"epoch": 0.79,
"grad_norm": 0.8843987584114075,
"learning_rate": 1.118686149305348e-06,
"loss": 0.5372,
"step": 12460
},
{
"epoch": 0.79,
"grad_norm": 0.9339917302131653,
"learning_rate": 1.1180394348814206e-06,
"loss": 0.5736,
"step": 12461
},
{
"epoch": 0.79,
"grad_norm": 0.8633296489715576,
"learning_rate": 1.117392883913349e-06,
"loss": 0.5683,
"step": 12462
},
{
"epoch": 0.79,
"grad_norm": 0.856465220451355,
"learning_rate": 1.1167464964283587e-06,
"loss": 0.5533,
"step": 12463
},
{
"epoch": 0.79,
"grad_norm": 0.851737916469574,
"learning_rate": 1.1161002724536623e-06,
"loss": 0.5454,
"step": 12464
},
{
"epoch": 0.79,
"grad_norm": 0.897743821144104,
"learning_rate": 1.115454212016473e-06,
"loss": 0.5616,
"step": 12465
},
{
"epoch": 0.79,
"grad_norm": 0.8601440191268921,
"learning_rate": 1.1148083151439932e-06,
"loss": 0.5598,
"step": 12466
},
{
"epoch": 0.79,
"grad_norm": 0.913490891456604,
"learning_rate": 1.1141625818634194e-06,
"loss": 0.5871,
"step": 12467
},
{
"epoch": 0.79,
"grad_norm": 0.9724521636962891,
"learning_rate": 1.1135170122019433e-06,
"loss": 0.6231,
"step": 12468
},
{
"epoch": 0.79,
"grad_norm": 0.9473910927772522,
"learning_rate": 1.112871606186744e-06,
"loss": 0.5913,
"step": 12469
},
{
"epoch": 0.79,
"grad_norm": 0.902228593826294,
"learning_rate": 1.112226363844998e-06,
"loss": 0.6149,
"step": 12470
},
{
"epoch": 0.79,
"grad_norm": 0.8123179078102112,
"learning_rate": 1.1115812852038777e-06,
"loss": 0.4786,
"step": 12471
},
{
"epoch": 0.79,
"grad_norm": 0.8619558811187744,
"learning_rate": 1.1109363702905419e-06,
"loss": 0.6023,
"step": 12472
},
{
"epoch": 0.79,
"grad_norm": 0.9255017042160034,
"learning_rate": 1.1102916191321456e-06,
"loss": 0.5984,
"step": 12473
},
{
"epoch": 0.79,
"grad_norm": 0.8794713616371155,
"learning_rate": 1.1096470317558384e-06,
"loss": 0.5654,
"step": 12474
},
{
"epoch": 0.79,
"grad_norm": 0.9066189527511597,
"learning_rate": 1.1090026081887611e-06,
"loss": 0.5686,
"step": 12475
},
{
"epoch": 0.79,
"grad_norm": 0.8906106352806091,
"learning_rate": 1.1083583484580495e-06,
"loss": 0.5434,
"step": 12476
},
{
"epoch": 0.79,
"grad_norm": 0.8132662177085876,
"learning_rate": 1.107714252590828e-06,
"loss": 0.5387,
"step": 12477
},
{
"epoch": 0.79,
"grad_norm": 0.8702597618103027,
"learning_rate": 1.1070703206142186e-06,
"loss": 0.5839,
"step": 12478
},
{
"epoch": 0.79,
"grad_norm": 0.9468421339988708,
"learning_rate": 1.1064265525553375e-06,
"loss": 0.6534,
"step": 12479
},
{
"epoch": 0.79,
"grad_norm": 0.8762499690055847,
"learning_rate": 1.1057829484412885e-06,
"loss": 0.5579,
"step": 12480
},
{
"epoch": 0.79,
"grad_norm": 0.9076083302497864,
"learning_rate": 1.1051395082991722e-06,
"loss": 0.5715,
"step": 12481
},
{
"epoch": 0.79,
"grad_norm": 0.8767365217208862,
"learning_rate": 1.1044962321560837e-06,
"loss": 0.5713,
"step": 12482
},
{
"epoch": 0.79,
"grad_norm": 0.864399254322052,
"learning_rate": 1.1038531200391045e-06,
"loss": 0.6256,
"step": 12483
},
{
"epoch": 0.79,
"grad_norm": 0.8748794794082642,
"learning_rate": 1.1032101719753197e-06,
"loss": 0.5357,
"step": 12484
},
{
"epoch": 0.79,
"grad_norm": 0.8488878607749939,
"learning_rate": 1.102567387991797e-06,
"loss": 0.5536,
"step": 12485
},
{
"epoch": 0.79,
"grad_norm": 0.8816309571266174,
"learning_rate": 1.101924768115603e-06,
"loss": 0.5564,
"step": 12486
},
{
"epoch": 0.79,
"grad_norm": 0.8800366520881653,
"learning_rate": 1.101282312373797e-06,
"loss": 0.5739,
"step": 12487
},
{
"epoch": 0.79,
"grad_norm": 0.9694901704788208,
"learning_rate": 1.1006400207934304e-06,
"loss": 0.6166,
"step": 12488
},
{
"epoch": 0.79,
"grad_norm": 0.9245271682739258,
"learning_rate": 1.0999978934015475e-06,
"loss": 0.5898,
"step": 12489
},
{
"epoch": 0.79,
"grad_norm": 0.9495474100112915,
"learning_rate": 1.0993559302251878e-06,
"loss": 0.6334,
"step": 12490
},
{
"epoch": 0.79,
"grad_norm": 0.9290176630020142,
"learning_rate": 1.0987141312913773e-06,
"loss": 0.583,
"step": 12491
},
{
"epoch": 0.79,
"grad_norm": 0.9221222400665283,
"learning_rate": 1.098072496627146e-06,
"loss": 0.6511,
"step": 12492
},
{
"epoch": 0.79,
"grad_norm": 0.9218513369560242,
"learning_rate": 1.0974310262595067e-06,
"loss": 0.6061,
"step": 12493
},
{
"epoch": 0.79,
"grad_norm": 0.9661274552345276,
"learning_rate": 1.096789720215471e-06,
"loss": 0.682,
"step": 12494
},
{
"epoch": 0.79,
"grad_norm": 0.9002541303634644,
"learning_rate": 1.0961485785220434e-06,
"loss": 0.5775,
"step": 12495
},
{
"epoch": 0.79,
"grad_norm": 0.8728871941566467,
"learning_rate": 1.0955076012062155e-06,
"loss": 0.6046,
"step": 12496
},
{
"epoch": 0.79,
"grad_norm": 0.9101660251617432,
"learning_rate": 1.094866788294981e-06,
"loss": 0.5323,
"step": 12497
},
{
"epoch": 0.79,
"grad_norm": 0.8614184856414795,
"learning_rate": 1.094226139815323e-06,
"loss": 0.5448,
"step": 12498
},
{
"epoch": 0.79,
"grad_norm": 0.934859037399292,
"learning_rate": 1.0935856557942132e-06,
"loss": 0.5444,
"step": 12499
},
{
"epoch": 0.79,
"grad_norm": 0.8698869943618774,
"learning_rate": 1.0929453362586223e-06,
"loss": 0.5316,
"step": 12500
},
{
"epoch": 0.79,
"grad_norm": 0.941087007522583,
"learning_rate": 1.0923051812355117e-06,
"loss": 0.6492,
"step": 12501
},
{
"epoch": 0.79,
"grad_norm": 0.9395822882652283,
"learning_rate": 1.091665190751836e-06,
"loss": 0.5986,
"step": 12502
},
{
"epoch": 0.79,
"grad_norm": 0.9707981944084167,
"learning_rate": 1.0910253648345442e-06,
"loss": 0.6301,
"step": 12503
},
{
"epoch": 0.79,
"grad_norm": 0.8878294825553894,
"learning_rate": 1.0903857035105736e-06,
"loss": 0.5955,
"step": 12504
},
{
"epoch": 0.79,
"grad_norm": 0.8388591408729553,
"learning_rate": 1.0897462068068616e-06,
"loss": 0.5295,
"step": 12505
},
{
"epoch": 0.79,
"grad_norm": 0.8977577686309814,
"learning_rate": 1.0891068747503353e-06,
"loss": 0.5855,
"step": 12506
},
{
"epoch": 0.79,
"grad_norm": 0.8421580195426941,
"learning_rate": 1.0884677073679123e-06,
"loss": 0.5343,
"step": 12507
},
{
"epoch": 0.79,
"grad_norm": 0.777437150478363,
"learning_rate": 1.0878287046865072e-06,
"loss": 0.5167,
"step": 12508
},
{
"epoch": 0.79,
"grad_norm": 0.8942268490791321,
"learning_rate": 1.0871898667330249e-06,
"loss": 0.5571,
"step": 12509
},
{
"epoch": 0.79,
"grad_norm": 0.8648400902748108,
"learning_rate": 1.0865511935343664e-06,
"loss": 0.5088,
"step": 12510
},
{
"epoch": 0.79,
"grad_norm": 0.8418681621551514,
"learning_rate": 1.0859126851174246e-06,
"loss": 0.5647,
"step": 12511
},
{
"epoch": 0.79,
"grad_norm": 0.9093899130821228,
"learning_rate": 1.0852743415090823e-06,
"loss": 0.5833,
"step": 12512
},
{
"epoch": 0.79,
"grad_norm": 0.8962028622627258,
"learning_rate": 1.0846361627362174e-06,
"loss": 0.5429,
"step": 12513
},
{
"epoch": 0.79,
"grad_norm": 0.8653958439826965,
"learning_rate": 1.0839981488257061e-06,
"loss": 0.5451,
"step": 12514
},
{
"epoch": 0.79,
"grad_norm": 0.8512267470359802,
"learning_rate": 1.0833602998044085e-06,
"loss": 0.5307,
"step": 12515
},
{
"epoch": 0.79,
"grad_norm": 0.8976225852966309,
"learning_rate": 1.0827226156991838e-06,
"loss": 0.605,
"step": 12516
},
{
"epoch": 0.79,
"grad_norm": 0.8847118020057678,
"learning_rate": 1.0820850965368822e-06,
"loss": 0.5519,
"step": 12517
},
{
"epoch": 0.79,
"grad_norm": 0.9687950611114502,
"learning_rate": 1.0814477423443482e-06,
"loss": 0.635,
"step": 12518
},
{
"epoch": 0.79,
"grad_norm": 0.8953339457511902,
"learning_rate": 1.0808105531484192e-06,
"loss": 0.5767,
"step": 12519
},
{
"epoch": 0.79,
"grad_norm": 0.9060798287391663,
"learning_rate": 1.0801735289759225e-06,
"loss": 0.5794,
"step": 12520
},
{
"epoch": 0.79,
"grad_norm": 0.9054228663444519,
"learning_rate": 1.0795366698536812e-06,
"loss": 0.5836,
"step": 12521
},
{
"epoch": 0.79,
"grad_norm": 0.8784095644950867,
"learning_rate": 1.078899975808515e-06,
"loss": 0.5729,
"step": 12522
},
{
"epoch": 0.79,
"grad_norm": 0.947877049446106,
"learning_rate": 1.0782634468672293e-06,
"loss": 0.5233,
"step": 12523
},
{
"epoch": 0.79,
"grad_norm": 0.866150438785553,
"learning_rate": 1.0776270830566266e-06,
"loss": 0.5557,
"step": 12524
},
{
"epoch": 0.79,
"grad_norm": 0.8818299174308777,
"learning_rate": 1.0769908844035032e-06,
"loss": 0.5335,
"step": 12525
},
{
"epoch": 0.79,
"grad_norm": 0.8382863998413086,
"learning_rate": 1.0763548509346461e-06,
"loss": 0.5066,
"step": 12526
},
{
"epoch": 0.79,
"grad_norm": 0.876054584980011,
"learning_rate": 1.0757189826768367e-06,
"loss": 0.5483,
"step": 12527
},
{
"epoch": 0.79,
"grad_norm": 0.9083865284919739,
"learning_rate": 1.075083279656851e-06,
"loss": 0.5776,
"step": 12528
},
{
"epoch": 0.79,
"grad_norm": 0.8849220275878906,
"learning_rate": 1.0744477419014532e-06,
"loss": 0.5793,
"step": 12529
},
{
"epoch": 0.79,
"grad_norm": 0.8889400959014893,
"learning_rate": 1.0738123694374047e-06,
"loss": 0.55,
"step": 12530
},
{
"epoch": 0.79,
"grad_norm": 0.8188003897666931,
"learning_rate": 1.0731771622914595e-06,
"loss": 0.5511,
"step": 12531
},
{
"epoch": 0.79,
"grad_norm": 0.8689089417457581,
"learning_rate": 1.072542120490363e-06,
"loss": 0.5708,
"step": 12532
},
{
"epoch": 0.79,
"grad_norm": 0.8809791803359985,
"learning_rate": 1.0719072440608575e-06,
"loss": 0.5782,
"step": 12533
},
{
"epoch": 0.79,
"grad_norm": 0.8765868544578552,
"learning_rate": 1.0712725330296697e-06,
"loss": 0.564,
"step": 12534
},
{
"epoch": 0.79,
"grad_norm": 0.8565429449081421,
"learning_rate": 1.07063798742353e-06,
"loss": 0.6002,
"step": 12535
},
{
"epoch": 0.79,
"grad_norm": 0.9748111367225647,
"learning_rate": 1.0700036072691566e-06,
"loss": 0.6289,
"step": 12536
},
{
"epoch": 0.79,
"grad_norm": 0.9657660722732544,
"learning_rate": 1.0693693925932585e-06,
"loss": 0.6292,
"step": 12537
},
{
"epoch": 0.79,
"grad_norm": 0.8865155577659607,
"learning_rate": 1.0687353434225418e-06,
"loss": 0.6005,
"step": 12538
},
{
"epoch": 0.79,
"grad_norm": 0.9077226519584656,
"learning_rate": 1.0681014597837042e-06,
"loss": 0.599,
"step": 12539
},
{
"epoch": 0.79,
"grad_norm": 0.8932228088378906,
"learning_rate": 1.0674677417034358e-06,
"loss": 0.5546,
"step": 12540
},
{
"epoch": 0.79,
"grad_norm": 0.8195880651473999,
"learning_rate": 1.0668341892084217e-06,
"loss": 0.5231,
"step": 12541
},
{
"epoch": 0.79,
"grad_norm": 0.9403937458992004,
"learning_rate": 1.0662008023253356e-06,
"loss": 0.5589,
"step": 12542
},
{
"epoch": 0.79,
"grad_norm": 0.9366670250892639,
"learning_rate": 1.0655675810808485e-06,
"loss": 0.6032,
"step": 12543
},
{
"epoch": 0.79,
"grad_norm": 0.8904660940170288,
"learning_rate": 1.0649345255016258e-06,
"loss": 0.6044,
"step": 12544
},
{
"epoch": 0.79,
"grad_norm": 0.9282307624816895,
"learning_rate": 1.0643016356143204e-06,
"loss": 0.624,
"step": 12545
},
{
"epoch": 0.79,
"grad_norm": 0.8701701164245605,
"learning_rate": 1.0636689114455811e-06,
"loss": 0.6007,
"step": 12546
},
{
"epoch": 0.79,
"grad_norm": 0.8869695067405701,
"learning_rate": 1.063036353022051e-06,
"loss": 0.6469,
"step": 12547
},
{
"epoch": 0.79,
"grad_norm": 0.8800140619277954,
"learning_rate": 1.0624039603703645e-06,
"loss": 0.509,
"step": 12548
},
{
"epoch": 0.8,
"grad_norm": 0.8784294724464417,
"learning_rate": 1.06177173351715e-06,
"loss": 0.5827,
"step": 12549
},
{
"epoch": 0.8,
"grad_norm": 0.9010189771652222,
"learning_rate": 1.061139672489027e-06,
"loss": 0.5904,
"step": 12550
},
{
"epoch": 0.8,
"grad_norm": 0.9229983687400818,
"learning_rate": 1.0605077773126083e-06,
"loss": 0.5881,
"step": 12551
},
{
"epoch": 0.8,
"grad_norm": 0.863856852054596,
"learning_rate": 1.059876048014506e-06,
"loss": 0.5963,
"step": 12552
},
{
"epoch": 0.8,
"grad_norm": 0.8975127935409546,
"learning_rate": 1.0592444846213145e-06,
"loss": 0.597,
"step": 12553
},
{
"epoch": 0.8,
"grad_norm": 0.8481269478797913,
"learning_rate": 1.058613087159629e-06,
"loss": 0.5336,
"step": 12554
},
{
"epoch": 0.8,
"grad_norm": 0.8915879130363464,
"learning_rate": 1.0579818556560357e-06,
"loss": 0.6215,
"step": 12555
},
{
"epoch": 0.8,
"grad_norm": 0.9215599298477173,
"learning_rate": 1.0573507901371126e-06,
"loss": 0.5748,
"step": 12556
},
{
"epoch": 0.8,
"grad_norm": 0.8889424800872803,
"learning_rate": 1.0567198906294341e-06,
"loss": 0.5658,
"step": 12557
},
{
"epoch": 0.8,
"grad_norm": 0.8827781081199646,
"learning_rate": 1.0560891571595616e-06,
"loss": 0.5804,
"step": 12558
},
{
"epoch": 0.8,
"grad_norm": 0.8699508905410767,
"learning_rate": 1.0554585897540553e-06,
"loss": 0.5575,
"step": 12559
},
{
"epoch": 0.8,
"grad_norm": 0.9525449872016907,
"learning_rate": 1.0548281884394657e-06,
"loss": 0.5723,
"step": 12560
},
{
"epoch": 0.8,
"grad_norm": 0.868190348148346,
"learning_rate": 1.0541979532423362e-06,
"loss": 0.5423,
"step": 12561
},
{
"epoch": 0.8,
"grad_norm": 0.8393450975418091,
"learning_rate": 1.053567884189205e-06,
"loss": 0.5703,
"step": 12562
},
{
"epoch": 0.8,
"grad_norm": 0.8783072829246521,
"learning_rate": 1.0529379813066026e-06,
"loss": 0.5426,
"step": 12563
},
{
"epoch": 0.8,
"grad_norm": 0.871197521686554,
"learning_rate": 1.0523082446210487e-06,
"loss": 0.5194,
"step": 12564
},
{
"epoch": 0.8,
"grad_norm": 0.9241814017295837,
"learning_rate": 1.051678674159064e-06,
"loss": 0.6151,
"step": 12565
},
{
"epoch": 0.8,
"grad_norm": 0.9252364039421082,
"learning_rate": 1.0510492699471536e-06,
"loss": 0.5901,
"step": 12566
},
{
"epoch": 0.8,
"grad_norm": 0.9119340777397156,
"learning_rate": 1.0504200320118214e-06,
"loss": 0.5864,
"step": 12567
},
{
"epoch": 0.8,
"grad_norm": 1.0206106901168823,
"learning_rate": 1.049790960379562e-06,
"loss": 0.6423,
"step": 12568
},
{
"epoch": 0.8,
"grad_norm": 0.9204949140548706,
"learning_rate": 1.0491620550768633e-06,
"loss": 0.5887,
"step": 12569
},
{
"epoch": 0.8,
"grad_norm": 0.9017525911331177,
"learning_rate": 1.048533316130207e-06,
"loss": 0.5684,
"step": 12570
},
{
"epoch": 0.8,
"grad_norm": 0.950289249420166,
"learning_rate": 1.0479047435660671e-06,
"loss": 0.5176,
"step": 12571
},
{
"epoch": 0.8,
"grad_norm": 0.9097518920898438,
"learning_rate": 1.047276337410908e-06,
"loss": 0.5952,
"step": 12572
},
{
"epoch": 0.8,
"grad_norm": 0.8936463594436646,
"learning_rate": 1.0466480976911947e-06,
"loss": 0.6109,
"step": 12573
},
{
"epoch": 0.8,
"grad_norm": 0.8422192931175232,
"learning_rate": 1.0460200244333758e-06,
"loss": 0.5667,
"step": 12574
},
{
"epoch": 0.8,
"grad_norm": 0.8249906301498413,
"learning_rate": 1.0453921176638981e-06,
"loss": 0.5465,
"step": 12575
},
{
"epoch": 0.8,
"grad_norm": 0.8235678672790527,
"learning_rate": 1.044764377409203e-06,
"loss": 0.5726,
"step": 12576
},
{
"epoch": 0.8,
"grad_norm": 1.0038381814956665,
"learning_rate": 1.0441368036957184e-06,
"loss": 0.5896,
"step": 12577
},
{
"epoch": 0.8,
"grad_norm": 0.8613317012786865,
"learning_rate": 1.0435093965498727e-06,
"loss": 0.5448,
"step": 12578
},
{
"epoch": 0.8,
"grad_norm": 0.8612802624702454,
"learning_rate": 1.0428821559980839e-06,
"loss": 0.5702,
"step": 12579
},
{
"epoch": 0.8,
"grad_norm": 0.8284898996353149,
"learning_rate": 1.0422550820667605e-06,
"loss": 0.5696,
"step": 12580
},
{
"epoch": 0.8,
"grad_norm": 0.8467788696289062,
"learning_rate": 1.0416281747823076e-06,
"loss": 0.5265,
"step": 12581
},
{
"epoch": 0.8,
"grad_norm": 0.9856624603271484,
"learning_rate": 1.0410014341711216e-06,
"loss": 0.6143,
"step": 12582
},
{
"epoch": 0.8,
"grad_norm": 0.8942157626152039,
"learning_rate": 1.0403748602595937e-06,
"loss": 0.6032,
"step": 12583
},
{
"epoch": 0.8,
"grad_norm": 0.9039360880851746,
"learning_rate": 1.0397484530741053e-06,
"loss": 0.6271,
"step": 12584
},
{
"epoch": 0.8,
"grad_norm": 0.9321849942207336,
"learning_rate": 1.0391222126410327e-06,
"loss": 0.6062,
"step": 12585
},
{
"epoch": 0.8,
"grad_norm": 0.8697063326835632,
"learning_rate": 1.0384961389867454e-06,
"loss": 0.546,
"step": 12586
},
{
"epoch": 0.8,
"grad_norm": 0.9680486917495728,
"learning_rate": 1.0378702321376054e-06,
"loss": 0.5974,
"step": 12587
},
{
"epoch": 0.8,
"grad_norm": 0.8630591034889221,
"learning_rate": 1.037244492119966e-06,
"loss": 0.5821,
"step": 12588
},
{
"epoch": 0.8,
"grad_norm": 0.8251073360443115,
"learning_rate": 1.036618918960175e-06,
"loss": 0.5908,
"step": 12589
},
{
"epoch": 0.8,
"grad_norm": 0.8866623044013977,
"learning_rate": 1.0359935126845738e-06,
"loss": 0.5549,
"step": 12590
},
{
"epoch": 0.8,
"grad_norm": 0.8582077622413635,
"learning_rate": 1.0353682733194965e-06,
"loss": 0.5637,
"step": 12591
},
{
"epoch": 0.8,
"grad_norm": 0.8953722715377808,
"learning_rate": 1.0347432008912688e-06,
"loss": 0.6297,
"step": 12592
},
{
"epoch": 0.8,
"grad_norm": 0.9254661798477173,
"learning_rate": 1.0341182954262125e-06,
"loss": 0.5432,
"step": 12593
},
{
"epoch": 0.8,
"grad_norm": 0.8666430711746216,
"learning_rate": 1.0334935569506355e-06,
"loss": 0.5653,
"step": 12594
},
{
"epoch": 0.8,
"grad_norm": 0.9944994449615479,
"learning_rate": 1.0328689854908492e-06,
"loss": 0.5792,
"step": 12595
},
{
"epoch": 0.8,
"grad_norm": 0.8367435336112976,
"learning_rate": 1.032244581073148e-06,
"loss": 0.5453,
"step": 12596
},
{
"epoch": 0.8,
"grad_norm": 0.9115063548088074,
"learning_rate": 1.0316203437238242e-06,
"loss": 0.6038,
"step": 12597
},
{
"epoch": 0.8,
"grad_norm": 0.8422768712043762,
"learning_rate": 1.0309962734691632e-06,
"loss": 0.5879,
"step": 12598
},
{
"epoch": 0.8,
"grad_norm": 0.9396683573722839,
"learning_rate": 1.0303723703354418e-06,
"loss": 0.6432,
"step": 12599
},
{
"epoch": 0.8,
"grad_norm": 0.8688830733299255,
"learning_rate": 1.0297486343489304e-06,
"loss": 0.578,
"step": 12600
},
{
"epoch": 0.8,
"grad_norm": 0.8575296401977539,
"learning_rate": 1.0291250655358942e-06,
"loss": 0.5906,
"step": 12601
},
{
"epoch": 0.8,
"grad_norm": 0.8545289635658264,
"learning_rate": 1.0285016639225849e-06,
"loss": 0.5982,
"step": 12602
},
{
"epoch": 0.8,
"grad_norm": 0.8576691150665283,
"learning_rate": 1.0278784295352572e-06,
"loss": 0.589,
"step": 12603
},
{
"epoch": 0.8,
"grad_norm": 0.8619968295097351,
"learning_rate": 1.0272553624001502e-06,
"loss": 0.5483,
"step": 12604
},
{
"epoch": 0.8,
"grad_norm": 0.8848919868469238,
"learning_rate": 1.0266324625434992e-06,
"loss": 0.5735,
"step": 12605
},
{
"epoch": 0.8,
"grad_norm": 0.8641508221626282,
"learning_rate": 1.0260097299915345e-06,
"loss": 0.5249,
"step": 12606
},
{
"epoch": 0.8,
"grad_norm": 0.8593783378601074,
"learning_rate": 1.0253871647704722e-06,
"loss": 0.5686,
"step": 12607
},
{
"epoch": 0.8,
"grad_norm": 0.860919713973999,
"learning_rate": 1.024764766906532e-06,
"loss": 0.5702,
"step": 12608
},
{
"epoch": 0.8,
"grad_norm": 0.9240328669548035,
"learning_rate": 1.0241425364259195e-06,
"loss": 0.6011,
"step": 12609
},
{
"epoch": 0.8,
"grad_norm": 0.8484996557235718,
"learning_rate": 1.0235204733548321e-06,
"loss": 0.5523,
"step": 12610
},
{
"epoch": 0.8,
"grad_norm": 0.8490926027297974,
"learning_rate": 1.022898577719465e-06,
"loss": 0.5931,
"step": 12611
},
{
"epoch": 0.8,
"grad_norm": 0.8484750986099243,
"learning_rate": 1.0222768495460029e-06,
"loss": 0.5313,
"step": 12612
},
{
"epoch": 0.8,
"grad_norm": 0.904883086681366,
"learning_rate": 1.0216552888606256e-06,
"loss": 0.5397,
"step": 12613
},
{
"epoch": 0.8,
"grad_norm": 0.8598143458366394,
"learning_rate": 1.0210338956895054e-06,
"loss": 0.5831,
"step": 12614
},
{
"epoch": 0.8,
"grad_norm": 0.8726117014884949,
"learning_rate": 1.020412670058804e-06,
"loss": 0.5869,
"step": 12615
},
{
"epoch": 0.8,
"grad_norm": 0.9262253642082214,
"learning_rate": 1.0197916119946821e-06,
"loss": 0.607,
"step": 12616
},
{
"epoch": 0.8,
"grad_norm": 0.9659039974212646,
"learning_rate": 1.0191707215232905e-06,
"loss": 0.6243,
"step": 12617
},
{
"epoch": 0.8,
"grad_norm": 0.9009899497032166,
"learning_rate": 1.0185499986707702e-06,
"loss": 0.6024,
"step": 12618
},
{
"epoch": 0.8,
"grad_norm": 0.8743886351585388,
"learning_rate": 1.0179294434632593e-06,
"loss": 0.5578,
"step": 12619
},
{
"epoch": 0.8,
"grad_norm": 0.8483142256736755,
"learning_rate": 1.0173090559268867e-06,
"loss": 0.5586,
"step": 12620
},
{
"epoch": 0.8,
"grad_norm": 0.9112587571144104,
"learning_rate": 1.0166888360877747e-06,
"loss": 0.5717,
"step": 12621
},
{
"epoch": 0.8,
"grad_norm": 0.9113679528236389,
"learning_rate": 1.0160687839720407e-06,
"loss": 0.5826,
"step": 12622
},
{
"epoch": 0.8,
"grad_norm": 0.8618003129959106,
"learning_rate": 1.0154488996057894e-06,
"loss": 0.6087,
"step": 12623
},
{
"epoch": 0.8,
"grad_norm": 0.9165884256362915,
"learning_rate": 1.0148291830151224e-06,
"loss": 0.6375,
"step": 12624
},
{
"epoch": 0.8,
"grad_norm": 0.9440232515335083,
"learning_rate": 1.014209634226138e-06,
"loss": 0.6116,
"step": 12625
},
{
"epoch": 0.8,
"grad_norm": 0.8831072449684143,
"learning_rate": 1.013590253264919e-06,
"loss": 0.6156,
"step": 12626
},
{
"epoch": 0.8,
"grad_norm": 0.8621459603309631,
"learning_rate": 1.0129710401575465e-06,
"loss": 0.587,
"step": 12627
},
{
"epoch": 0.8,
"grad_norm": 0.8391403555870056,
"learning_rate": 1.0123519949300942e-06,
"loss": 0.5753,
"step": 12628
},
{
"epoch": 0.8,
"grad_norm": 0.8815181851387024,
"learning_rate": 1.0117331176086264e-06,
"loss": 0.5571,
"step": 12629
},
{
"epoch": 0.8,
"grad_norm": 0.881256103515625,
"learning_rate": 1.0111144082192048e-06,
"loss": 0.5949,
"step": 12630
},
{
"epoch": 0.8,
"grad_norm": 0.9096524715423584,
"learning_rate": 1.0104958667878778e-06,
"loss": 0.5858,
"step": 12631
},
{
"epoch": 0.8,
"grad_norm": 0.9030601382255554,
"learning_rate": 1.0098774933406903e-06,
"loss": 0.5881,
"step": 12632
},
{
"epoch": 0.8,
"grad_norm": 0.9352519512176514,
"learning_rate": 1.0092592879036834e-06,
"loss": 0.5795,
"step": 12633
},
{
"epoch": 0.8,
"grad_norm": 0.9007598161697388,
"learning_rate": 1.0086412505028836e-06,
"loss": 0.5347,
"step": 12634
},
{
"epoch": 0.8,
"grad_norm": 0.9302735328674316,
"learning_rate": 1.0080233811643158e-06,
"loss": 0.5944,
"step": 12635
},
{
"epoch": 0.8,
"grad_norm": 0.8626806735992432,
"learning_rate": 1.0074056799139981e-06,
"loss": 0.6201,
"step": 12636
},
{
"epoch": 0.8,
"grad_norm": 0.9001949429512024,
"learning_rate": 1.006788146777935e-06,
"loss": 0.5805,
"step": 12637
},
{
"epoch": 0.8,
"grad_norm": 0.9080000519752502,
"learning_rate": 1.0061707817821343e-06,
"loss": 0.5803,
"step": 12638
},
{
"epoch": 0.8,
"grad_norm": 0.8849290013313293,
"learning_rate": 1.0055535849525872e-06,
"loss": 0.5354,
"step": 12639
},
{
"epoch": 0.8,
"grad_norm": 0.9328687787055969,
"learning_rate": 1.004936556315283e-06,
"loss": 0.6155,
"step": 12640
},
{
"epoch": 0.8,
"grad_norm": 0.9027367234230042,
"learning_rate": 1.004319695896202e-06,
"loss": 0.5903,
"step": 12641
},
{
"epoch": 0.8,
"grad_norm": 0.8899877667427063,
"learning_rate": 1.0037030037213197e-06,
"loss": 0.5407,
"step": 12642
},
{
"epoch": 0.8,
"grad_norm": 0.9732675552368164,
"learning_rate": 1.0030864798166013e-06,
"loss": 0.6381,
"step": 12643
},
{
"epoch": 0.8,
"grad_norm": 0.9128854870796204,
"learning_rate": 1.0024701242080082e-06,
"loss": 0.6036,
"step": 12644
},
{
"epoch": 0.8,
"grad_norm": 0.905947744846344,
"learning_rate": 1.0018539369214891e-06,
"loss": 0.5918,
"step": 12645
},
{
"epoch": 0.8,
"grad_norm": 0.8508647084236145,
"learning_rate": 1.0012379179829951e-06,
"loss": 0.5757,
"step": 12646
},
{
"epoch": 0.8,
"grad_norm": 0.9843933582305908,
"learning_rate": 1.0006220674184602e-06,
"loss": 0.6191,
"step": 12647
},
{
"epoch": 0.8,
"grad_norm": 0.9173324704170227,
"learning_rate": 1.0000063852538172e-06,
"loss": 0.5374,
"step": 12648
},
{
"epoch": 0.8,
"grad_norm": 0.9240821599960327,
"learning_rate": 9.993908715149902e-07,
"loss": 0.5682,
"step": 12649
},
{
"epoch": 0.8,
"grad_norm": 0.8344833850860596,
"learning_rate": 9.98775526227897e-07,
"loss": 0.5103,
"step": 12650
},
{
"epoch": 0.8,
"grad_norm": 0.8877370953559875,
"learning_rate": 9.981603494184473e-07,
"loss": 0.6135,
"step": 12651
},
{
"epoch": 0.8,
"grad_norm": 0.9172238111495972,
"learning_rate": 9.975453411125447e-07,
"loss": 0.5739,
"step": 12652
},
{
"epoch": 0.8,
"grad_norm": 0.9218218326568604,
"learning_rate": 9.969305013360825e-07,
"loss": 0.5615,
"step": 12653
},
{
"epoch": 0.8,
"grad_norm": 0.9229342341423035,
"learning_rate": 9.963158301149522e-07,
"loss": 0.6138,
"step": 12654
},
{
"epoch": 0.8,
"grad_norm": 0.9228758215904236,
"learning_rate": 9.957013274750338e-07,
"loss": 0.6017,
"step": 12655
},
{
"epoch": 0.8,
"grad_norm": 0.8756922483444214,
"learning_rate": 9.95086993442203e-07,
"loss": 0.577,
"step": 12656
},
{
"epoch": 0.8,
"grad_norm": 0.9407891035079956,
"learning_rate": 9.944728280423265e-07,
"loss": 0.6189,
"step": 12657
},
{
"epoch": 0.8,
"grad_norm": 0.8400014042854309,
"learning_rate": 9.938588313012655e-07,
"loss": 0.5349,
"step": 12658
},
{
"epoch": 0.8,
"grad_norm": 0.9128040671348572,
"learning_rate": 9.93245003244872e-07,
"loss": 0.5806,
"step": 12659
},
{
"epoch": 0.8,
"grad_norm": 0.9192377328872681,
"learning_rate": 9.92631343898995e-07,
"loss": 0.5908,
"step": 12660
},
{
"epoch": 0.8,
"grad_norm": 0.9443216919898987,
"learning_rate": 9.920178532894698e-07,
"loss": 0.5576,
"step": 12661
},
{
"epoch": 0.8,
"grad_norm": 0.8419626355171204,
"learning_rate": 9.9140453144213e-07,
"loss": 0.5135,
"step": 12662
},
{
"epoch": 0.8,
"grad_norm": 0.8705309629440308,
"learning_rate": 9.907913783828004e-07,
"loss": 0.5648,
"step": 12663
},
{
"epoch": 0.8,
"grad_norm": 0.9433914422988892,
"learning_rate": 9.901783941372988e-07,
"loss": 0.5512,
"step": 12664
},
{
"epoch": 0.8,
"grad_norm": 0.9095032811164856,
"learning_rate": 9.895655787314361e-07,
"loss": 0.6271,
"step": 12665
},
{
"epoch": 0.8,
"grad_norm": 0.8473713994026184,
"learning_rate": 9.889529321910169e-07,
"loss": 0.5568,
"step": 12666
},
{
"epoch": 0.8,
"grad_norm": 0.8689250349998474,
"learning_rate": 9.88340454541834e-07,
"loss": 0.5617,
"step": 12667
},
{
"epoch": 0.8,
"grad_norm": 0.94936603307724,
"learning_rate": 9.87728145809681e-07,
"loss": 0.6084,
"step": 12668
},
{
"epoch": 0.8,
"grad_norm": 0.8550971150398254,
"learning_rate": 9.871160060203371e-07,
"loss": 0.485,
"step": 12669
},
{
"epoch": 0.8,
"grad_norm": 0.8166051506996155,
"learning_rate": 9.865040351995787e-07,
"loss": 0.5548,
"step": 12670
},
{
"epoch": 0.8,
"grad_norm": 0.8855223655700684,
"learning_rate": 9.85892233373173e-07,
"loss": 0.5517,
"step": 12671
},
{
"epoch": 0.8,
"grad_norm": 0.8748183846473694,
"learning_rate": 9.852806005668813e-07,
"loss": 0.5437,
"step": 12672
},
{
"epoch": 0.8,
"grad_norm": 0.9316419959068298,
"learning_rate": 9.846691368064577e-07,
"loss": 0.5686,
"step": 12673
},
{
"epoch": 0.8,
"grad_norm": 0.8692405819892883,
"learning_rate": 9.840578421176495e-07,
"loss": 0.5458,
"step": 12674
},
{
"epoch": 0.8,
"grad_norm": 0.9151699542999268,
"learning_rate": 9.834467165261924e-07,
"loss": 0.5581,
"step": 12675
},
{
"epoch": 0.8,
"grad_norm": 0.8994660973548889,
"learning_rate": 9.828357600578242e-07,
"loss": 0.5499,
"step": 12676
},
{
"epoch": 0.8,
"grad_norm": 0.9051674008369446,
"learning_rate": 9.82224972738266e-07,
"loss": 0.6041,
"step": 12677
},
{
"epoch": 0.8,
"grad_norm": 0.845827043056488,
"learning_rate": 9.816143545932378e-07,
"loss": 0.5242,
"step": 12678
},
{
"epoch": 0.8,
"grad_norm": 0.8907935619354248,
"learning_rate": 9.8100390564845e-07,
"loss": 0.5491,
"step": 12679
},
{
"epoch": 0.8,
"grad_norm": 0.833772599697113,
"learning_rate": 9.803936259296066e-07,
"loss": 0.5004,
"step": 12680
},
{
"epoch": 0.8,
"grad_norm": 0.9332374930381775,
"learning_rate": 9.797835154624041e-07,
"loss": 0.6143,
"step": 12681
},
{
"epoch": 0.8,
"grad_norm": 0.906049370765686,
"learning_rate": 9.791735742725339e-07,
"loss": 0.641,
"step": 12682
},
{
"epoch": 0.8,
"grad_norm": 0.9151736497879028,
"learning_rate": 9.78563802385676e-07,
"loss": 0.533,
"step": 12683
},
{
"epoch": 0.8,
"grad_norm": 0.8841385841369629,
"learning_rate": 9.779541998275067e-07,
"loss": 0.5605,
"step": 12684
},
{
"epoch": 0.8,
"grad_norm": 0.8864476084709167,
"learning_rate": 9.773447666236946e-07,
"loss": 0.5398,
"step": 12685
},
{
"epoch": 0.8,
"grad_norm": 0.8535383343696594,
"learning_rate": 9.767355027999004e-07,
"loss": 0.5665,
"step": 12686
},
{
"epoch": 0.8,
"grad_norm": 0.8663592338562012,
"learning_rate": 9.761264083817795e-07,
"loss": 0.5445,
"step": 12687
},
{
"epoch": 0.8,
"grad_norm": 0.9647719860076904,
"learning_rate": 9.755174833949749e-07,
"loss": 0.528,
"step": 12688
},
{
"epoch": 0.8,
"grad_norm": 0.9407845139503479,
"learning_rate": 9.749087278651304e-07,
"loss": 0.6414,
"step": 12689
},
{
"epoch": 0.8,
"grad_norm": 0.869473934173584,
"learning_rate": 9.743001418178782e-07,
"loss": 0.5956,
"step": 12690
},
{
"epoch": 0.8,
"grad_norm": 0.9082080125808716,
"learning_rate": 9.736917252788414e-07,
"loss": 0.5468,
"step": 12691
},
{
"epoch": 0.8,
"grad_norm": 0.8994290232658386,
"learning_rate": 9.730834782736393e-07,
"loss": 0.5714,
"step": 12692
},
{
"epoch": 0.8,
"grad_norm": 0.8263580799102783,
"learning_rate": 9.724754008278836e-07,
"loss": 0.5674,
"step": 12693
},
{
"epoch": 0.8,
"grad_norm": 0.8548535704612732,
"learning_rate": 9.718674929671778e-07,
"loss": 0.5221,
"step": 12694
},
{
"epoch": 0.8,
"grad_norm": 0.88187575340271,
"learning_rate": 9.71259754717121e-07,
"loss": 0.5945,
"step": 12695
},
{
"epoch": 0.8,
"grad_norm": 0.9128777384757996,
"learning_rate": 9.706521861032974e-07,
"loss": 0.5188,
"step": 12696
},
{
"epoch": 0.8,
"grad_norm": 0.9275169968605042,
"learning_rate": 9.700447871512953e-07,
"loss": 0.5871,
"step": 12697
},
{
"epoch": 0.8,
"grad_norm": 0.9131552577018738,
"learning_rate": 9.694375578866889e-07,
"loss": 0.6075,
"step": 12698
},
{
"epoch": 0.8,
"grad_norm": 0.9026870727539062,
"learning_rate": 9.688304983350443e-07,
"loss": 0.5856,
"step": 12699
},
{
"epoch": 0.8,
"grad_norm": 0.8732842803001404,
"learning_rate": 9.682236085219243e-07,
"loss": 0.5447,
"step": 12700
},
{
"epoch": 0.8,
"grad_norm": 0.9150475859642029,
"learning_rate": 9.67616888472882e-07,
"loss": 0.5613,
"step": 12701
},
{
"epoch": 0.8,
"grad_norm": 0.8680015802383423,
"learning_rate": 9.670103382134655e-07,
"loss": 0.5447,
"step": 12702
},
{
"epoch": 0.8,
"grad_norm": 0.9147303700447083,
"learning_rate": 9.664039577692152e-07,
"loss": 0.5829,
"step": 12703
},
{
"epoch": 0.8,
"grad_norm": 0.9100850224494934,
"learning_rate": 9.65797747165661e-07,
"loss": 0.5835,
"step": 12704
},
{
"epoch": 0.8,
"grad_norm": 0.8714893460273743,
"learning_rate": 9.65191706428328e-07,
"loss": 0.5811,
"step": 12705
},
{
"epoch": 0.8,
"grad_norm": 0.8966681361198425,
"learning_rate": 9.645858355827392e-07,
"loss": 0.5887,
"step": 12706
},
{
"epoch": 0.81,
"grad_norm": 0.8519495725631714,
"learning_rate": 9.639801346544015e-07,
"loss": 0.5868,
"step": 12707
},
{
"epoch": 0.81,
"grad_norm": 0.9009888768196106,
"learning_rate": 9.633746036688196e-07,
"loss": 0.5386,
"step": 12708
},
{
"epoch": 0.81,
"grad_norm": 0.8464866280555725,
"learning_rate": 9.627692426514907e-07,
"loss": 0.5542,
"step": 12709
},
{
"epoch": 0.81,
"grad_norm": 0.9193606972694397,
"learning_rate": 9.621640516279047e-07,
"loss": 0.5917,
"step": 12710
},
{
"epoch": 0.81,
"grad_norm": 0.9212644100189209,
"learning_rate": 9.61559030623545e-07,
"loss": 0.6036,
"step": 12711
},
{
"epoch": 0.81,
"grad_norm": 0.9172996878623962,
"learning_rate": 9.609541796638848e-07,
"loss": 0.5267,
"step": 12712
},
{
"epoch": 0.81,
"grad_norm": 0.8999651074409485,
"learning_rate": 9.603494987743932e-07,
"loss": 0.5834,
"step": 12713
},
{
"epoch": 0.81,
"grad_norm": 0.9378758072853088,
"learning_rate": 9.597449879805314e-07,
"loss": 0.5732,
"step": 12714
},
{
"epoch": 0.81,
"grad_norm": 0.8777378797531128,
"learning_rate": 9.59140647307753e-07,
"loss": 0.5429,
"step": 12715
},
{
"epoch": 0.81,
"grad_norm": 0.9323769807815552,
"learning_rate": 9.585364767815048e-07,
"loss": 0.5651,
"step": 12716
},
{
"epoch": 0.81,
"grad_norm": 0.9203583598136902,
"learning_rate": 9.57932476427228e-07,
"loss": 0.6104,
"step": 12717
},
{
"epoch": 0.81,
"grad_norm": 0.8466483354568481,
"learning_rate": 9.573286462703501e-07,
"loss": 0.5486,
"step": 12718
},
{
"epoch": 0.81,
"grad_norm": 0.9139605164527893,
"learning_rate": 9.567249863363027e-07,
"loss": 0.5501,
"step": 12719
},
{
"epoch": 0.81,
"grad_norm": 0.9106989502906799,
"learning_rate": 9.56121496650499e-07,
"loss": 0.5969,
"step": 12720
},
{
"epoch": 0.81,
"grad_norm": 0.8454228043556213,
"learning_rate": 9.55518177238351e-07,
"loss": 0.5935,
"step": 12721
},
{
"epoch": 0.81,
"grad_norm": 0.8739069700241089,
"learning_rate": 9.549150281252633e-07,
"loss": 0.599,
"step": 12722
},
{
"epoch": 0.81,
"grad_norm": 0.8933126926422119,
"learning_rate": 9.54312049336632e-07,
"loss": 0.5864,
"step": 12723
},
{
"epoch": 0.81,
"grad_norm": 0.8254374861717224,
"learning_rate": 9.53709240897846e-07,
"loss": 0.5309,
"step": 12724
},
{
"epoch": 0.81,
"grad_norm": 0.8132497668266296,
"learning_rate": 9.531066028342895e-07,
"loss": 0.6107,
"step": 12725
},
{
"epoch": 0.81,
"grad_norm": 0.870490312576294,
"learning_rate": 9.525041351713332e-07,
"loss": 0.5699,
"step": 12726
},
{
"epoch": 0.81,
"grad_norm": 0.8581969141960144,
"learning_rate": 9.519018379343486e-07,
"loss": 0.5235,
"step": 12727
},
{
"epoch": 0.81,
"grad_norm": 0.8589658141136169,
"learning_rate": 9.512997111486965e-07,
"loss": 0.6124,
"step": 12728
},
{
"epoch": 0.81,
"grad_norm": 0.885682225227356,
"learning_rate": 9.506977548397284e-07,
"loss": 0.6406,
"step": 12729
},
{
"epoch": 0.81,
"grad_norm": 0.8724113702774048,
"learning_rate": 9.50095969032791e-07,
"loss": 0.6235,
"step": 12730
},
{
"epoch": 0.81,
"grad_norm": 0.9281406998634338,
"learning_rate": 9.494943537532242e-07,
"loss": 0.5744,
"step": 12731
},
{
"epoch": 0.81,
"grad_norm": 0.8291860222816467,
"learning_rate": 9.488929090263588e-07,
"loss": 0.5355,
"step": 12732
},
{
"epoch": 0.81,
"grad_norm": 0.8633788228034973,
"learning_rate": 9.482916348775217e-07,
"loss": 0.5948,
"step": 12733
},
{
"epoch": 0.81,
"grad_norm": 0.9064257740974426,
"learning_rate": 9.476905313320283e-07,
"loss": 0.627,
"step": 12734
},
{
"epoch": 0.81,
"grad_norm": 0.9185280203819275,
"learning_rate": 9.470895984151879e-07,
"loss": 0.5504,
"step": 12735
},
{
"epoch": 0.81,
"grad_norm": 0.9063805937767029,
"learning_rate": 9.464888361523078e-07,
"loss": 0.5554,
"step": 12736
},
{
"epoch": 0.81,
"grad_norm": 0.9305859804153442,
"learning_rate": 9.458882445686807e-07,
"loss": 0.6012,
"step": 12737
},
{
"epoch": 0.81,
"grad_norm": 0.9028577208518982,
"learning_rate": 9.452878236895963e-07,
"loss": 0.6199,
"step": 12738
},
{
"epoch": 0.81,
"grad_norm": 0.870011568069458,
"learning_rate": 9.446875735403366e-07,
"loss": 0.5725,
"step": 12739
},
{
"epoch": 0.81,
"grad_norm": 0.8897619247436523,
"learning_rate": 9.440874941461753e-07,
"loss": 0.5679,
"step": 12740
},
{
"epoch": 0.81,
"grad_norm": 0.8889445662498474,
"learning_rate": 9.434875855323816e-07,
"loss": 0.5471,
"step": 12741
},
{
"epoch": 0.81,
"grad_norm": 0.8454013466835022,
"learning_rate": 9.428878477242131e-07,
"loss": 0.5971,
"step": 12742
},
{
"epoch": 0.81,
"grad_norm": 0.911864161491394,
"learning_rate": 9.422882807469219e-07,
"loss": 0.536,
"step": 12743
},
{
"epoch": 0.81,
"grad_norm": 0.9062489867210388,
"learning_rate": 9.416888846257588e-07,
"loss": 0.5738,
"step": 12744
},
{
"epoch": 0.81,
"grad_norm": 0.8988074660301208,
"learning_rate": 9.41089659385957e-07,
"loss": 0.5907,
"step": 12745
},
{
"epoch": 0.81,
"grad_norm": 0.9005908370018005,
"learning_rate": 9.404906050527496e-07,
"loss": 0.5977,
"step": 12746
},
{
"epoch": 0.81,
"grad_norm": 0.8807809352874756,
"learning_rate": 9.398917216513625e-07,
"loss": 0.5498,
"step": 12747
},
{
"epoch": 0.81,
"grad_norm": 1.0026898384094238,
"learning_rate": 9.39293009207008e-07,
"loss": 0.6227,
"step": 12748
},
{
"epoch": 0.81,
"grad_norm": 0.9482130408287048,
"learning_rate": 9.386944677449017e-07,
"loss": 0.5968,
"step": 12749
},
{
"epoch": 0.81,
"grad_norm": 0.8382649421691895,
"learning_rate": 9.380960972902414e-07,
"loss": 0.5151,
"step": 12750
},
{
"epoch": 0.81,
"grad_norm": 0.8774755597114563,
"learning_rate": 9.374978978682248e-07,
"loss": 0.558,
"step": 12751
},
{
"epoch": 0.81,
"grad_norm": 1.0281962156295776,
"learning_rate": 9.368998695040387e-07,
"loss": 0.6242,
"step": 12752
},
{
"epoch": 0.81,
"grad_norm": 0.8988599181175232,
"learning_rate": 9.363020122228645e-07,
"loss": 0.5831,
"step": 12753
},
{
"epoch": 0.81,
"grad_norm": 0.9397704005241394,
"learning_rate": 9.357043260498766e-07,
"loss": 0.5837,
"step": 12754
},
{
"epoch": 0.81,
"grad_norm": 0.851276695728302,
"learning_rate": 9.351068110102418e-07,
"loss": 0.5161,
"step": 12755
},
{
"epoch": 0.81,
"grad_norm": 0.9624939560890198,
"learning_rate": 9.345094671291155e-07,
"loss": 0.5814,
"step": 12756
},
{
"epoch": 0.81,
"grad_norm": 0.9801254868507385,
"learning_rate": 9.339122944316559e-07,
"loss": 0.6233,
"step": 12757
},
{
"epoch": 0.81,
"grad_norm": 0.8699768781661987,
"learning_rate": 9.333152929430029e-07,
"loss": 0.5727,
"step": 12758
},
{
"epoch": 0.81,
"grad_norm": 0.9153022766113281,
"learning_rate": 9.327184626882963e-07,
"loss": 0.6218,
"step": 12759
},
{
"epoch": 0.81,
"grad_norm": 0.8925560712814331,
"learning_rate": 9.321218036926677e-07,
"loss": 0.5697,
"step": 12760
},
{
"epoch": 0.81,
"grad_norm": 0.8836098313331604,
"learning_rate": 9.315253159812359e-07,
"loss": 0.5824,
"step": 12761
},
{
"epoch": 0.81,
"grad_norm": 0.895380437374115,
"learning_rate": 9.30928999579121e-07,
"loss": 0.5481,
"step": 12762
},
{
"epoch": 0.81,
"grad_norm": 0.9610360264778137,
"learning_rate": 9.303328545114321e-07,
"loss": 0.532,
"step": 12763
},
{
"epoch": 0.81,
"grad_norm": 0.9137628078460693,
"learning_rate": 9.29736880803268e-07,
"loss": 0.53,
"step": 12764
},
{
"epoch": 0.81,
"grad_norm": 0.9747650623321533,
"learning_rate": 9.29141078479725e-07,
"loss": 0.6109,
"step": 12765
},
{
"epoch": 0.81,
"grad_norm": 0.9045560956001282,
"learning_rate": 9.285454475658889e-07,
"loss": 0.5683,
"step": 12766
},
{
"epoch": 0.81,
"grad_norm": 0.8270063400268555,
"learning_rate": 9.279499880868409e-07,
"loss": 0.5004,
"step": 12767
},
{
"epoch": 0.81,
"grad_norm": 0.8460723757743835,
"learning_rate": 9.273547000676547e-07,
"loss": 0.599,
"step": 12768
},
{
"epoch": 0.81,
"grad_norm": 0.9193210601806641,
"learning_rate": 9.267595835333915e-07,
"loss": 0.5865,
"step": 12769
},
{
"epoch": 0.81,
"grad_norm": 0.910054087638855,
"learning_rate": 9.261646385091139e-07,
"loss": 0.543,
"step": 12770
},
{
"epoch": 0.81,
"grad_norm": 0.8721626400947571,
"learning_rate": 9.25569865019873e-07,
"loss": 0.5724,
"step": 12771
},
{
"epoch": 0.81,
"grad_norm": 0.9765549302101135,
"learning_rate": 9.249752630907094e-07,
"loss": 0.5772,
"step": 12772
},
{
"epoch": 0.81,
"grad_norm": 0.8811758756637573,
"learning_rate": 9.243808327466619e-07,
"loss": 0.5403,
"step": 12773
},
{
"epoch": 0.81,
"grad_norm": 0.8623104095458984,
"learning_rate": 9.237865740127594e-07,
"loss": 0.5399,
"step": 12774
},
{
"epoch": 0.81,
"grad_norm": 0.8554300665855408,
"learning_rate": 9.231924869140241e-07,
"loss": 0.5435,
"step": 12775
},
{
"epoch": 0.81,
"grad_norm": 0.8593326210975647,
"learning_rate": 9.225985714754721e-07,
"loss": 0.5547,
"step": 12776
},
{
"epoch": 0.81,
"grad_norm": 0.9124411344528198,
"learning_rate": 9.220048277221089e-07,
"loss": 0.5711,
"step": 12777
},
{
"epoch": 0.81,
"grad_norm": 0.9051636457443237,
"learning_rate": 9.214112556789345e-07,
"loss": 0.5853,
"step": 12778
},
{
"epoch": 0.81,
"grad_norm": 0.877150297164917,
"learning_rate": 9.208178553709468e-07,
"loss": 0.5834,
"step": 12779
},
{
"epoch": 0.81,
"grad_norm": 0.9467854499816895,
"learning_rate": 9.202246268231274e-07,
"loss": 0.6122,
"step": 12780
},
{
"epoch": 0.81,
"grad_norm": 0.9263243079185486,
"learning_rate": 9.196315700604564e-07,
"loss": 0.635,
"step": 12781
},
{
"epoch": 0.81,
"grad_norm": 0.8572517037391663,
"learning_rate": 9.190386851079053e-07,
"loss": 0.5614,
"step": 12782
},
{
"epoch": 0.81,
"grad_norm": 0.8789429664611816,
"learning_rate": 9.184459719904388e-07,
"loss": 0.5525,
"step": 12783
},
{
"epoch": 0.81,
"grad_norm": 0.8996034264564514,
"learning_rate": 9.178534307330145e-07,
"loss": 0.5553,
"step": 12784
},
{
"epoch": 0.81,
"grad_norm": 0.8926593661308289,
"learning_rate": 9.17261061360581e-07,
"loss": 0.603,
"step": 12785
},
{
"epoch": 0.81,
"grad_norm": 0.9206883311271667,
"learning_rate": 9.166688638980791e-07,
"loss": 0.5725,
"step": 12786
},
{
"epoch": 0.81,
"grad_norm": 0.8996316194534302,
"learning_rate": 9.160768383704499e-07,
"loss": 0.5316,
"step": 12787
},
{
"epoch": 0.81,
"grad_norm": 0.8590518236160278,
"learning_rate": 9.154849848026165e-07,
"loss": 0.5715,
"step": 12788
},
{
"epoch": 0.81,
"grad_norm": 0.8883064389228821,
"learning_rate": 9.148933032195013e-07,
"loss": 0.5745,
"step": 12789
},
{
"epoch": 0.81,
"grad_norm": 0.9011886119842529,
"learning_rate": 9.14301793646018e-07,
"loss": 0.5894,
"step": 12790
},
{
"epoch": 0.81,
"grad_norm": 0.8296880722045898,
"learning_rate": 9.137104561070736e-07,
"loss": 0.5376,
"step": 12791
},
{
"epoch": 0.81,
"grad_norm": 0.8713788986206055,
"learning_rate": 9.13119290627566e-07,
"loss": 0.5824,
"step": 12792
},
{
"epoch": 0.81,
"grad_norm": 0.8919610977172852,
"learning_rate": 9.125282972323895e-07,
"loss": 0.5717,
"step": 12793
},
{
"epoch": 0.81,
"grad_norm": 0.9087851643562317,
"learning_rate": 9.119374759464261e-07,
"loss": 0.5855,
"step": 12794
},
{
"epoch": 0.81,
"grad_norm": 0.8336042165756226,
"learning_rate": 9.113468267945541e-07,
"loss": 0.5096,
"step": 12795
},
{
"epoch": 0.81,
"grad_norm": 0.8984754681587219,
"learning_rate": 9.107563498016436e-07,
"loss": 0.6249,
"step": 12796
},
{
"epoch": 0.81,
"grad_norm": 0.9276543855667114,
"learning_rate": 9.101660449925576e-07,
"loss": 0.6166,
"step": 12797
},
{
"epoch": 0.81,
"grad_norm": 0.9266611933708191,
"learning_rate": 9.095759123921538e-07,
"loss": 0.5569,
"step": 12798
},
{
"epoch": 0.81,
"grad_norm": 0.8445834517478943,
"learning_rate": 9.089859520252759e-07,
"loss": 0.536,
"step": 12799
},
{
"epoch": 0.81,
"grad_norm": 1.0016990900039673,
"learning_rate": 9.083961639167693e-07,
"loss": 0.624,
"step": 12800
},
{
"epoch": 0.81,
"grad_norm": 0.9784378409385681,
"learning_rate": 9.078065480914678e-07,
"loss": 0.6467,
"step": 12801
},
{
"epoch": 0.81,
"grad_norm": 0.9219988584518433,
"learning_rate": 9.072171045741957e-07,
"loss": 0.5891,
"step": 12802
},
{
"epoch": 0.81,
"grad_norm": 0.9053341150283813,
"learning_rate": 9.066278333897732e-07,
"loss": 0.6182,
"step": 12803
},
{
"epoch": 0.81,
"grad_norm": 0.9229487776756287,
"learning_rate": 9.060387345630134e-07,
"loss": 0.5547,
"step": 12804
},
{
"epoch": 0.81,
"grad_norm": 0.8746492266654968,
"learning_rate": 9.054498081187202e-07,
"loss": 0.5368,
"step": 12805
},
{
"epoch": 0.81,
"grad_norm": 0.9092094898223877,
"learning_rate": 9.048610540816932e-07,
"loss": 0.5661,
"step": 12806
},
{
"epoch": 0.81,
"grad_norm": 0.8250091671943665,
"learning_rate": 9.042724724767199e-07,
"loss": 0.5977,
"step": 12807
},
{
"epoch": 0.81,
"grad_norm": 0.856377363204956,
"learning_rate": 9.036840633285837e-07,
"loss": 0.5594,
"step": 12808
},
{
"epoch": 0.81,
"grad_norm": 0.9337197542190552,
"learning_rate": 9.030958266620637e-07,
"loss": 0.6058,
"step": 12809
},
{
"epoch": 0.81,
"grad_norm": 0.9406629204750061,
"learning_rate": 9.025077625019252e-07,
"loss": 0.5506,
"step": 12810
},
{
"epoch": 0.81,
"grad_norm": 0.8978514671325684,
"learning_rate": 9.01919870872931e-07,
"loss": 0.6085,
"step": 12811
},
{
"epoch": 0.81,
"grad_norm": 0.8674015998840332,
"learning_rate": 9.013321517998347e-07,
"loss": 0.5683,
"step": 12812
},
{
"epoch": 0.81,
"grad_norm": 0.94971764087677,
"learning_rate": 9.007446053073832e-07,
"loss": 0.6416,
"step": 12813
},
{
"epoch": 0.81,
"grad_norm": 0.836727499961853,
"learning_rate": 9.001572314203172e-07,
"loss": 0.5604,
"step": 12814
},
{
"epoch": 0.81,
"grad_norm": 0.9299215078353882,
"learning_rate": 8.99570030163367e-07,
"loss": 0.5895,
"step": 12815
},
{
"epoch": 0.81,
"grad_norm": 0.8771916031837463,
"learning_rate": 8.989830015612566e-07,
"loss": 0.5793,
"step": 12816
},
{
"epoch": 0.81,
"grad_norm": 0.8739469647407532,
"learning_rate": 8.983961456387086e-07,
"loss": 0.6095,
"step": 12817
},
{
"epoch": 0.81,
"grad_norm": 0.899440586566925,
"learning_rate": 8.978094624204292e-07,
"loss": 0.5347,
"step": 12818
},
{
"epoch": 0.81,
"grad_norm": 0.916681706905365,
"learning_rate": 8.972229519311227e-07,
"loss": 0.5969,
"step": 12819
},
{
"epoch": 0.81,
"grad_norm": 0.8973095417022705,
"learning_rate": 8.966366141954852e-07,
"loss": 0.6042,
"step": 12820
},
{
"epoch": 0.81,
"grad_norm": 0.9100470542907715,
"learning_rate": 8.960504492382055e-07,
"loss": 0.5789,
"step": 12821
},
{
"epoch": 0.81,
"grad_norm": 0.8430030345916748,
"learning_rate": 8.95464457083966e-07,
"loss": 0.5646,
"step": 12822
},
{
"epoch": 0.81,
"grad_norm": 0.869049072265625,
"learning_rate": 8.948786377574382e-07,
"loss": 0.5889,
"step": 12823
},
{
"epoch": 0.81,
"grad_norm": 0.8816308379173279,
"learning_rate": 8.942929912832904e-07,
"loss": 0.5535,
"step": 12824
},
{
"epoch": 0.81,
"grad_norm": 0.8329145908355713,
"learning_rate": 8.93707517686182e-07,
"loss": 0.5467,
"step": 12825
},
{
"epoch": 0.81,
"grad_norm": 0.8186325430870056,
"learning_rate": 8.93122216990765e-07,
"loss": 0.5437,
"step": 12826
},
{
"epoch": 0.81,
"grad_norm": 0.9899107813835144,
"learning_rate": 8.92537089221685e-07,
"loss": 0.6088,
"step": 12827
},
{
"epoch": 0.81,
"grad_norm": 0.8372784852981567,
"learning_rate": 8.919521344035808e-07,
"loss": 0.5536,
"step": 12828
},
{
"epoch": 0.81,
"grad_norm": 0.8933220505714417,
"learning_rate": 8.913673525610783e-07,
"loss": 0.5864,
"step": 12829
},
{
"epoch": 0.81,
"grad_norm": 0.8942568898200989,
"learning_rate": 8.907827437188065e-07,
"loss": 0.5824,
"step": 12830
},
{
"epoch": 0.81,
"grad_norm": 0.8576558232307434,
"learning_rate": 8.901983079013771e-07,
"loss": 0.5339,
"step": 12831
},
{
"epoch": 0.81,
"grad_norm": 0.892993152141571,
"learning_rate": 8.896140451334001e-07,
"loss": 0.5865,
"step": 12832
},
{
"epoch": 0.81,
"grad_norm": 0.854968786239624,
"learning_rate": 8.890299554394766e-07,
"loss": 0.5602,
"step": 12833
},
{
"epoch": 0.81,
"grad_norm": 0.8966131806373596,
"learning_rate": 8.884460388442006e-07,
"loss": 0.5696,
"step": 12834
},
{
"epoch": 0.81,
"grad_norm": 0.9169580936431885,
"learning_rate": 8.878622953721589e-07,
"loss": 0.5467,
"step": 12835
},
{
"epoch": 0.81,
"grad_norm": 0.867084801197052,
"learning_rate": 8.87278725047932e-07,
"loss": 0.5794,
"step": 12836
},
{
"epoch": 0.81,
"grad_norm": 0.9113507866859436,
"learning_rate": 8.866953278960888e-07,
"loss": 0.5922,
"step": 12837
},
{
"epoch": 0.81,
"grad_norm": 0.8957472443580627,
"learning_rate": 8.86112103941198e-07,
"loss": 0.5407,
"step": 12838
},
{
"epoch": 0.81,
"grad_norm": 0.8440329432487488,
"learning_rate": 8.855290532078148e-07,
"loss": 0.507,
"step": 12839
},
{
"epoch": 0.81,
"grad_norm": 0.8923792839050293,
"learning_rate": 8.849461757204897e-07,
"loss": 0.6195,
"step": 12840
},
{
"epoch": 0.81,
"grad_norm": 0.8741909861564636,
"learning_rate": 8.843634715037669e-07,
"loss": 0.5827,
"step": 12841
},
{
"epoch": 0.81,
"grad_norm": 0.8744585514068604,
"learning_rate": 8.83780940582179e-07,
"loss": 0.6142,
"step": 12842
},
{
"epoch": 0.81,
"grad_norm": 0.8400830030441284,
"learning_rate": 8.83198582980257e-07,
"loss": 0.5726,
"step": 12843
},
{
"epoch": 0.81,
"grad_norm": 0.9461512565612793,
"learning_rate": 8.826163987225233e-07,
"loss": 0.5863,
"step": 12844
},
{
"epoch": 0.81,
"grad_norm": 0.9131925106048584,
"learning_rate": 8.82034387833488e-07,
"loss": 0.5959,
"step": 12845
},
{
"epoch": 0.81,
"grad_norm": 0.9167430996894836,
"learning_rate": 8.814525503376597e-07,
"loss": 0.5696,
"step": 12846
},
{
"epoch": 0.81,
"grad_norm": 0.8956706523895264,
"learning_rate": 8.808708862595367e-07,
"loss": 0.561,
"step": 12847
},
{
"epoch": 0.81,
"grad_norm": 0.8876976370811462,
"learning_rate": 8.802893956236114e-07,
"loss": 0.5375,
"step": 12848
},
{
"epoch": 0.81,
"grad_norm": 0.9218643307685852,
"learning_rate": 8.797080784543699e-07,
"loss": 0.561,
"step": 12849
},
{
"epoch": 0.81,
"grad_norm": 0.9845806956291199,
"learning_rate": 8.791269347762849e-07,
"loss": 0.5885,
"step": 12850
},
{
"epoch": 0.81,
"grad_norm": 0.8306980729103088,
"learning_rate": 8.785459646138306e-07,
"loss": 0.5472,
"step": 12851
},
{
"epoch": 0.81,
"grad_norm": 0.867559015750885,
"learning_rate": 8.779651679914692e-07,
"loss": 0.5377,
"step": 12852
},
{
"epoch": 0.81,
"grad_norm": 0.8420113921165466,
"learning_rate": 8.773845449336537e-07,
"loss": 0.5777,
"step": 12853
},
{
"epoch": 0.81,
"grad_norm": 0.9076850414276123,
"learning_rate": 8.768040954648338e-07,
"loss": 0.6205,
"step": 12854
},
{
"epoch": 0.81,
"grad_norm": 0.8947234153747559,
"learning_rate": 8.762238196094502e-07,
"loss": 0.5319,
"step": 12855
},
{
"epoch": 0.81,
"grad_norm": 0.9484972357749939,
"learning_rate": 8.756437173919352e-07,
"loss": 0.5909,
"step": 12856
},
{
"epoch": 0.81,
"grad_norm": 0.8586333990097046,
"learning_rate": 8.750637888367164e-07,
"loss": 0.5382,
"step": 12857
},
{
"epoch": 0.81,
"grad_norm": 0.8808966875076294,
"learning_rate": 8.744840339682126e-07,
"loss": 0.5391,
"step": 12858
},
{
"epoch": 0.81,
"grad_norm": 0.8085102438926697,
"learning_rate": 8.73904452810832e-07,
"loss": 0.5483,
"step": 12859
},
{
"epoch": 0.81,
"grad_norm": 0.9202531576156616,
"learning_rate": 8.733250453889841e-07,
"loss": 0.5316,
"step": 12860
},
{
"epoch": 0.81,
"grad_norm": 0.907964289188385,
"learning_rate": 8.727458117270615e-07,
"loss": 0.5327,
"step": 12861
},
{
"epoch": 0.81,
"grad_norm": 0.9174656867980957,
"learning_rate": 8.721667518494553e-07,
"loss": 0.5938,
"step": 12862
},
{
"epoch": 0.81,
"grad_norm": 0.8944279551506042,
"learning_rate": 8.715878657805471e-07,
"loss": 0.5871,
"step": 12863
},
{
"epoch": 0.82,
"grad_norm": 0.8624773025512695,
"learning_rate": 8.710091535447123e-07,
"loss": 0.5784,
"step": 12864
},
{
"epoch": 0.82,
"grad_norm": 0.8640050888061523,
"learning_rate": 8.704306151663184e-07,
"loss": 0.536,
"step": 12865
},
{
"epoch": 0.82,
"grad_norm": 0.8937069177627563,
"learning_rate": 8.698522506697271e-07,
"loss": 0.5738,
"step": 12866
},
{
"epoch": 0.82,
"grad_norm": 0.8589310646057129,
"learning_rate": 8.692740600792871e-07,
"loss": 0.5897,
"step": 12867
},
{
"epoch": 0.82,
"grad_norm": 0.8561339378356934,
"learning_rate": 8.686960434193486e-07,
"loss": 0.5341,
"step": 12868
},
{
"epoch": 0.82,
"grad_norm": 0.9550122618675232,
"learning_rate": 8.681182007142475e-07,
"loss": 0.5872,
"step": 12869
},
{
"epoch": 0.82,
"grad_norm": 0.9209311008453369,
"learning_rate": 8.675405319883146e-07,
"loss": 0.5488,
"step": 12870
},
{
"epoch": 0.82,
"grad_norm": 0.9099619388580322,
"learning_rate": 8.66963037265876e-07,
"loss": 0.5861,
"step": 12871
},
{
"epoch": 0.82,
"grad_norm": 0.7935923337936401,
"learning_rate": 8.663857165712431e-07,
"loss": 0.504,
"step": 12872
},
{
"epoch": 0.82,
"grad_norm": 0.8865057229995728,
"learning_rate": 8.658085699287294e-07,
"loss": 0.5812,
"step": 12873
},
{
"epoch": 0.82,
"grad_norm": 0.8969137668609619,
"learning_rate": 8.652315973626362e-07,
"loss": 0.5974,
"step": 12874
},
{
"epoch": 0.82,
"grad_norm": 0.8373164534568787,
"learning_rate": 8.646547988972553e-07,
"loss": 0.5351,
"step": 12875
},
{
"epoch": 0.82,
"grad_norm": 0.8645594716072083,
"learning_rate": 8.64078174556875e-07,
"loss": 0.5746,
"step": 12876
},
{
"epoch": 0.82,
"grad_norm": 0.921709418296814,
"learning_rate": 8.635017243657751e-07,
"loss": 0.5987,
"step": 12877
},
{
"epoch": 0.82,
"grad_norm": 0.9052848815917969,
"learning_rate": 8.629254483482274e-07,
"loss": 0.5244,
"step": 12878
},
{
"epoch": 0.82,
"grad_norm": 0.8511383533477783,
"learning_rate": 8.623493465284987e-07,
"loss": 0.505,
"step": 12879
},
{
"epoch": 0.82,
"grad_norm": 0.893326997756958,
"learning_rate": 8.61773418930843e-07,
"loss": 0.5679,
"step": 12880
},
{
"epoch": 0.82,
"grad_norm": 0.9681572914123535,
"learning_rate": 8.611976655795135e-07,
"loss": 0.6534,
"step": 12881
},
{
"epoch": 0.82,
"grad_norm": 0.9120391607284546,
"learning_rate": 8.606220864987541e-07,
"loss": 0.5988,
"step": 12882
},
{
"epoch": 0.82,
"grad_norm": 0.8803929686546326,
"learning_rate": 8.600466817127972e-07,
"loss": 0.5486,
"step": 12883
},
{
"epoch": 0.82,
"grad_norm": 0.9291055798530579,
"learning_rate": 8.59471451245873e-07,
"loss": 0.5445,
"step": 12884
},
{
"epoch": 0.82,
"grad_norm": 0.8664212226867676,
"learning_rate": 8.588963951222024e-07,
"loss": 0.5972,
"step": 12885
},
{
"epoch": 0.82,
"grad_norm": 0.8658425211906433,
"learning_rate": 8.583215133659983e-07,
"loss": 0.6289,
"step": 12886
},
{
"epoch": 0.82,
"grad_norm": 0.8955614566802979,
"learning_rate": 8.577468060014688e-07,
"loss": 0.5883,
"step": 12887
},
{
"epoch": 0.82,
"grad_norm": 0.9566403031349182,
"learning_rate": 8.571722730528098e-07,
"loss": 0.6358,
"step": 12888
},
{
"epoch": 0.82,
"grad_norm": 0.8918949365615845,
"learning_rate": 8.565979145442138e-07,
"loss": 0.5786,
"step": 12889
},
{
"epoch": 0.82,
"grad_norm": 0.8985361456871033,
"learning_rate": 8.560237304998681e-07,
"loss": 0.5742,
"step": 12890
},
{
"epoch": 0.82,
"grad_norm": 0.9355623722076416,
"learning_rate": 8.554497209439461e-07,
"loss": 0.6009,
"step": 12891
},
{
"epoch": 0.82,
"grad_norm": 0.8992531299591064,
"learning_rate": 8.548758859006184e-07,
"loss": 0.5768,
"step": 12892
},
{
"epoch": 0.82,
"grad_norm": 0.9270733594894409,
"learning_rate": 8.543022253940475e-07,
"loss": 0.5583,
"step": 12893
},
{
"epoch": 0.82,
"grad_norm": 0.8681014180183411,
"learning_rate": 8.537287394483878e-07,
"loss": 0.5807,
"step": 12894
},
{
"epoch": 0.82,
"grad_norm": 0.9098723530769348,
"learning_rate": 8.531554280877885e-07,
"loss": 0.5598,
"step": 12895
},
{
"epoch": 0.82,
"grad_norm": 0.9069850444793701,
"learning_rate": 8.525822913363868e-07,
"loss": 0.6112,
"step": 12896
},
{
"epoch": 0.82,
"grad_norm": 0.8686051368713379,
"learning_rate": 8.520093292183163e-07,
"loss": 0.5605,
"step": 12897
},
{
"epoch": 0.82,
"grad_norm": 0.9454940557479858,
"learning_rate": 8.514365417577048e-07,
"loss": 0.5387,
"step": 12898
},
{
"epoch": 0.82,
"grad_norm": 0.8750715851783752,
"learning_rate": 8.50863928978668e-07,
"loss": 0.5455,
"step": 12899
},
{
"epoch": 0.82,
"grad_norm": 0.9609119892120361,
"learning_rate": 8.502914909053173e-07,
"loss": 0.5859,
"step": 12900
},
{
"epoch": 0.82,
"grad_norm": 0.8676950931549072,
"learning_rate": 8.497192275617577e-07,
"loss": 0.5496,
"step": 12901
},
{
"epoch": 0.82,
"grad_norm": 0.8623301386833191,
"learning_rate": 8.491471389720807e-07,
"loss": 0.6052,
"step": 12902
},
{
"epoch": 0.82,
"grad_norm": 0.9404549598693848,
"learning_rate": 8.485752251603807e-07,
"loss": 0.5788,
"step": 12903
},
{
"epoch": 0.82,
"grad_norm": 0.9115918278694153,
"learning_rate": 8.480034861507347e-07,
"loss": 0.5342,
"step": 12904
},
{
"epoch": 0.82,
"grad_norm": 0.8364629149436951,
"learning_rate": 8.474319219672183e-07,
"loss": 0.5695,
"step": 12905
},
{
"epoch": 0.82,
"grad_norm": 0.8777880072593689,
"learning_rate": 8.46860532633898e-07,
"loss": 0.5487,
"step": 12906
},
{
"epoch": 0.82,
"grad_norm": 0.9011834263801575,
"learning_rate": 8.462893181748327e-07,
"loss": 0.5618,
"step": 12907
},
{
"epoch": 0.82,
"grad_norm": 0.8608363270759583,
"learning_rate": 8.457182786140744e-07,
"loss": 0.5919,
"step": 12908
},
{
"epoch": 0.82,
"grad_norm": 0.9006455540657043,
"learning_rate": 8.451474139756693e-07,
"loss": 0.6024,
"step": 12909
},
{
"epoch": 0.82,
"grad_norm": 0.8328776955604553,
"learning_rate": 8.445767242836506e-07,
"loss": 0.5455,
"step": 12910
},
{
"epoch": 0.82,
"grad_norm": 0.859550416469574,
"learning_rate": 8.440062095620527e-07,
"loss": 0.5565,
"step": 12911
},
{
"epoch": 0.82,
"grad_norm": 0.8993778824806213,
"learning_rate": 8.434358698348944e-07,
"loss": 0.598,
"step": 12912
},
{
"epoch": 0.82,
"grad_norm": 0.8660597205162048,
"learning_rate": 8.428657051261918e-07,
"loss": 0.5732,
"step": 12913
},
{
"epoch": 0.82,
"grad_norm": 0.8744674324989319,
"learning_rate": 8.422957154599526e-07,
"loss": 0.5754,
"step": 12914
},
{
"epoch": 0.82,
"grad_norm": 0.9497204422950745,
"learning_rate": 8.417259008601775e-07,
"loss": 0.5412,
"step": 12915
},
{
"epoch": 0.82,
"grad_norm": 0.8864256739616394,
"learning_rate": 8.411562613508595e-07,
"loss": 0.5603,
"step": 12916
},
{
"epoch": 0.82,
"grad_norm": 0.959272563457489,
"learning_rate": 8.405867969559845e-07,
"loss": 0.5884,
"step": 12917
},
{
"epoch": 0.82,
"grad_norm": 0.8853299021720886,
"learning_rate": 8.400175076995287e-07,
"loss": 0.5456,
"step": 12918
},
{
"epoch": 0.82,
"grad_norm": 0.8390821218490601,
"learning_rate": 8.394483936054643e-07,
"loss": 0.5739,
"step": 12919
},
{
"epoch": 0.82,
"grad_norm": 0.8850178122520447,
"learning_rate": 8.388794546977546e-07,
"loss": 0.5718,
"step": 12920
},
{
"epoch": 0.82,
"grad_norm": 0.9476692080497742,
"learning_rate": 8.383106910003552e-07,
"loss": 0.5619,
"step": 12921
},
{
"epoch": 0.82,
"grad_norm": 0.9127770066261292,
"learning_rate": 8.377421025372157e-07,
"loss": 0.5741,
"step": 12922
},
{
"epoch": 0.82,
"grad_norm": 0.8317306041717529,
"learning_rate": 8.371736893322763e-07,
"loss": 0.4702,
"step": 12923
},
{
"epoch": 0.82,
"grad_norm": 0.87800532579422,
"learning_rate": 8.366054514094718e-07,
"loss": 0.5637,
"step": 12924
},
{
"epoch": 0.82,
"grad_norm": 0.8989687561988831,
"learning_rate": 8.360373887927298e-07,
"loss": 0.5926,
"step": 12925
},
{
"epoch": 0.82,
"grad_norm": 0.9448102712631226,
"learning_rate": 8.35469501505966e-07,
"loss": 0.5936,
"step": 12926
},
{
"epoch": 0.82,
"grad_norm": 0.8820131421089172,
"learning_rate": 8.349017895730948e-07,
"loss": 0.5735,
"step": 12927
},
{
"epoch": 0.82,
"grad_norm": 0.9099850654602051,
"learning_rate": 8.343342530180198e-07,
"loss": 0.5738,
"step": 12928
},
{
"epoch": 0.82,
"grad_norm": 0.9121573567390442,
"learning_rate": 8.33766891864638e-07,
"loss": 0.5523,
"step": 12929
},
{
"epoch": 0.82,
"grad_norm": 0.9185227155685425,
"learning_rate": 8.331997061368391e-07,
"loss": 0.5919,
"step": 12930
},
{
"epoch": 0.82,
"grad_norm": 0.8707922101020813,
"learning_rate": 8.326326958585062e-07,
"loss": 0.6125,
"step": 12931
},
{
"epoch": 0.82,
"grad_norm": 0.8843598365783691,
"learning_rate": 8.320658610535115e-07,
"loss": 0.5889,
"step": 12932
},
{
"epoch": 0.82,
"grad_norm": 0.903973400592804,
"learning_rate": 8.314992017457263e-07,
"loss": 0.5731,
"step": 12933
},
{
"epoch": 0.82,
"grad_norm": 0.8613129258155823,
"learning_rate": 8.30932717959007e-07,
"loss": 0.5461,
"step": 12934
},
{
"epoch": 0.82,
"grad_norm": 0.912260890007019,
"learning_rate": 8.303664097172087e-07,
"loss": 0.5855,
"step": 12935
},
{
"epoch": 0.82,
"grad_norm": 0.8741612434387207,
"learning_rate": 8.298002770441749e-07,
"loss": 0.5981,
"step": 12936
},
{
"epoch": 0.82,
"grad_norm": 0.8477001190185547,
"learning_rate": 8.292343199637448e-07,
"loss": 0.5833,
"step": 12937
},
{
"epoch": 0.82,
"grad_norm": 0.8845143914222717,
"learning_rate": 8.286685384997484e-07,
"loss": 0.5903,
"step": 12938
},
{
"epoch": 0.82,
"grad_norm": 0.933994472026825,
"learning_rate": 8.281029326760104e-07,
"loss": 0.5752,
"step": 12939
},
{
"epoch": 0.82,
"grad_norm": 0.8114098310470581,
"learning_rate": 8.275375025163418e-07,
"loss": 0.5763,
"step": 12940
},
{
"epoch": 0.82,
"grad_norm": 0.9245671033859253,
"learning_rate": 8.269722480445569e-07,
"loss": 0.6329,
"step": 12941
},
{
"epoch": 0.82,
"grad_norm": 0.9412350058555603,
"learning_rate": 8.264071692844527e-07,
"loss": 0.5522,
"step": 12942
},
{
"epoch": 0.82,
"grad_norm": 0.8871721625328064,
"learning_rate": 8.258422662598231e-07,
"loss": 0.5602,
"step": 12943
},
{
"epoch": 0.82,
"grad_norm": 0.9519109725952148,
"learning_rate": 8.252775389944556e-07,
"loss": 0.5704,
"step": 12944
},
{
"epoch": 0.82,
"grad_norm": 0.9257845282554626,
"learning_rate": 8.247129875121274e-07,
"loss": 0.6062,
"step": 12945
},
{
"epoch": 0.82,
"grad_norm": 0.9066646695137024,
"learning_rate": 8.24148611836611e-07,
"loss": 0.556,
"step": 12946
},
{
"epoch": 0.82,
"grad_norm": 0.8821330070495605,
"learning_rate": 8.235844119916708e-07,
"loss": 0.5509,
"step": 12947
},
{
"epoch": 0.82,
"grad_norm": 1.1398460865020752,
"learning_rate": 8.230203880010612e-07,
"loss": 0.588,
"step": 12948
},
{
"epoch": 0.82,
"grad_norm": 0.8532936573028564,
"learning_rate": 8.224565398885325e-07,
"loss": 0.5312,
"step": 12949
},
{
"epoch": 0.82,
"grad_norm": 0.8935076594352722,
"learning_rate": 8.218928676778264e-07,
"loss": 0.5871,
"step": 12950
},
{
"epoch": 0.82,
"grad_norm": 0.8617026209831238,
"learning_rate": 8.213293713926767e-07,
"loss": 0.5039,
"step": 12951
},
{
"epoch": 0.82,
"grad_norm": 0.9438952207565308,
"learning_rate": 8.207660510568122e-07,
"loss": 0.6125,
"step": 12952
},
{
"epoch": 0.82,
"grad_norm": 0.8180469274520874,
"learning_rate": 8.202029066939482e-07,
"loss": 0.5147,
"step": 12953
},
{
"epoch": 0.82,
"grad_norm": 0.8670182824134827,
"learning_rate": 8.196399383278004e-07,
"loss": 0.6175,
"step": 12954
},
{
"epoch": 0.82,
"grad_norm": 0.9703617691993713,
"learning_rate": 8.190771459820739e-07,
"loss": 0.6071,
"step": 12955
},
{
"epoch": 0.82,
"grad_norm": 0.9828335046768188,
"learning_rate": 8.18514529680463e-07,
"loss": 0.6214,
"step": 12956
},
{
"epoch": 0.82,
"grad_norm": 0.8318359851837158,
"learning_rate": 8.179520894466592e-07,
"loss": 0.5637,
"step": 12957
},
{
"epoch": 0.82,
"grad_norm": 0.8575620651245117,
"learning_rate": 8.173898253043444e-07,
"loss": 0.5122,
"step": 12958
},
{
"epoch": 0.82,
"grad_norm": 0.8485636115074158,
"learning_rate": 8.168277372771937e-07,
"loss": 0.5165,
"step": 12959
},
{
"epoch": 0.82,
"grad_norm": 0.8538296222686768,
"learning_rate": 8.162658253888761e-07,
"loss": 0.6073,
"step": 12960
},
{
"epoch": 0.82,
"grad_norm": 0.8725820779800415,
"learning_rate": 8.157040896630481e-07,
"loss": 0.5341,
"step": 12961
},
{
"epoch": 0.82,
"grad_norm": 0.855991780757904,
"learning_rate": 8.151425301233656e-07,
"loss": 0.5491,
"step": 12962
},
{
"epoch": 0.82,
"grad_norm": 0.9150635600090027,
"learning_rate": 8.14581146793475e-07,
"loss": 0.5929,
"step": 12963
},
{
"epoch": 0.82,
"grad_norm": 0.9065380692481995,
"learning_rate": 8.140199396970106e-07,
"loss": 0.5817,
"step": 12964
},
{
"epoch": 0.82,
"grad_norm": 0.8524861335754395,
"learning_rate": 8.13458908857605e-07,
"loss": 0.5101,
"step": 12965
},
{
"epoch": 0.82,
"grad_norm": 0.8974103331565857,
"learning_rate": 8.128980542988801e-07,
"loss": 0.5379,
"step": 12966
},
{
"epoch": 0.82,
"grad_norm": 0.8953040242195129,
"learning_rate": 8.12337376044453e-07,
"loss": 0.5447,
"step": 12967
},
{
"epoch": 0.82,
"grad_norm": 0.9523823261260986,
"learning_rate": 8.117768741179322e-07,
"loss": 0.6085,
"step": 12968
},
{
"epoch": 0.82,
"grad_norm": 0.8712965846061707,
"learning_rate": 8.112165485429163e-07,
"loss": 0.5753,
"step": 12969
},
{
"epoch": 0.82,
"grad_norm": 0.9363554120063782,
"learning_rate": 8.106563993429983e-07,
"loss": 0.5624,
"step": 12970
},
{
"epoch": 0.82,
"grad_norm": 0.8442745208740234,
"learning_rate": 8.100964265417682e-07,
"loss": 0.5491,
"step": 12971
},
{
"epoch": 0.82,
"grad_norm": 0.9169662594795227,
"learning_rate": 8.09536630162801e-07,
"loss": 0.5917,
"step": 12972
},
{
"epoch": 0.82,
"grad_norm": 0.948613166809082,
"learning_rate": 8.089770102296685e-07,
"loss": 0.5397,
"step": 12973
},
{
"epoch": 0.82,
"grad_norm": 0.8877300024032593,
"learning_rate": 8.084175667659345e-07,
"loss": 0.5818,
"step": 12974
},
{
"epoch": 0.82,
"grad_norm": 0.8682299852371216,
"learning_rate": 8.078582997951556e-07,
"loss": 0.5694,
"step": 12975
},
{
"epoch": 0.82,
"grad_norm": 0.8772991299629211,
"learning_rate": 8.072992093408816e-07,
"loss": 0.6045,
"step": 12976
},
{
"epoch": 0.82,
"grad_norm": 0.8861331343650818,
"learning_rate": 8.067402954266512e-07,
"loss": 0.6145,
"step": 12977
},
{
"epoch": 0.82,
"grad_norm": 0.8956562876701355,
"learning_rate": 8.061815580759996e-07,
"loss": 0.5567,
"step": 12978
},
{
"epoch": 0.82,
"grad_norm": 0.8872475624084473,
"learning_rate": 8.056229973124529e-07,
"loss": 0.5827,
"step": 12979
},
{
"epoch": 0.82,
"grad_norm": 0.8420911431312561,
"learning_rate": 8.050646131595313e-07,
"loss": 0.5742,
"step": 12980
},
{
"epoch": 0.82,
"grad_norm": 0.8587638735771179,
"learning_rate": 8.045064056407453e-07,
"loss": 0.5755,
"step": 12981
},
{
"epoch": 0.82,
"grad_norm": 0.8562715649604797,
"learning_rate": 8.039483747796012e-07,
"loss": 0.5786,
"step": 12982
},
{
"epoch": 0.82,
"grad_norm": 0.8843387365341187,
"learning_rate": 8.033905205995913e-07,
"loss": 0.5752,
"step": 12983
},
{
"epoch": 0.82,
"grad_norm": 0.8959712982177734,
"learning_rate": 8.0283284312421e-07,
"loss": 0.5532,
"step": 12984
},
{
"epoch": 0.82,
"grad_norm": 0.8698373436927795,
"learning_rate": 8.022753423769359e-07,
"loss": 0.587,
"step": 12985
},
{
"epoch": 0.82,
"grad_norm": 0.8483936190605164,
"learning_rate": 8.017180183812439e-07,
"loss": 0.5502,
"step": 12986
},
{
"epoch": 0.82,
"grad_norm": 0.866079568862915,
"learning_rate": 8.011608711606017e-07,
"loss": 0.5588,
"step": 12987
},
{
"epoch": 0.82,
"grad_norm": 0.8948245048522949,
"learning_rate": 8.006039007384681e-07,
"loss": 0.5838,
"step": 12988
},
{
"epoch": 0.82,
"grad_norm": 0.9978700876235962,
"learning_rate": 8.000471071382959e-07,
"loss": 0.6032,
"step": 12989
},
{
"epoch": 0.82,
"grad_norm": 0.8848072290420532,
"learning_rate": 7.99490490383531e-07,
"loss": 0.5889,
"step": 12990
},
{
"epoch": 0.82,
"grad_norm": 0.8934358954429626,
"learning_rate": 7.989340504976062e-07,
"loss": 0.5968,
"step": 12991
},
{
"epoch": 0.82,
"grad_norm": 0.8869682550430298,
"learning_rate": 7.983777875039567e-07,
"loss": 0.5398,
"step": 12992
},
{
"epoch": 0.82,
"grad_norm": 0.8653879165649414,
"learning_rate": 7.978217014260009e-07,
"loss": 0.5723,
"step": 12993
},
{
"epoch": 0.82,
"grad_norm": 0.9040364027023315,
"learning_rate": 7.972657922871546e-07,
"loss": 0.5593,
"step": 12994
},
{
"epoch": 0.82,
"grad_norm": 0.8811683058738708,
"learning_rate": 7.967100601108258e-07,
"loss": 0.577,
"step": 12995
},
{
"epoch": 0.82,
"grad_norm": 0.8992339968681335,
"learning_rate": 7.961545049204145e-07,
"loss": 0.634,
"step": 12996
},
{
"epoch": 0.82,
"grad_norm": 0.8207805156707764,
"learning_rate": 7.955991267393127e-07,
"loss": 0.5304,
"step": 12997
},
{
"epoch": 0.82,
"grad_norm": 0.9232082366943359,
"learning_rate": 7.950439255909065e-07,
"loss": 0.6293,
"step": 12998
},
{
"epoch": 0.82,
"grad_norm": 0.884673535823822,
"learning_rate": 7.944889014985718e-07,
"loss": 0.5888,
"step": 12999
},
{
"epoch": 0.82,
"grad_norm": 0.8796509504318237,
"learning_rate": 7.939340544856783e-07,
"loss": 0.5765,
"step": 13000
},
{
"epoch": 0.82,
"grad_norm": 0.8928359746932983,
"learning_rate": 7.933793845755922e-07,
"loss": 0.5899,
"step": 13001
},
{
"epoch": 0.82,
"grad_norm": 0.8858817219734192,
"learning_rate": 7.928248917916653e-07,
"loss": 0.573,
"step": 13002
},
{
"epoch": 0.82,
"grad_norm": 0.9019994735717773,
"learning_rate": 7.922705761572464e-07,
"loss": 0.5574,
"step": 13003
},
{
"epoch": 0.82,
"grad_norm": 0.8664145469665527,
"learning_rate": 7.91716437695676e-07,
"loss": 0.5113,
"step": 13004
},
{
"epoch": 0.82,
"grad_norm": 0.91963791847229,
"learning_rate": 7.911624764302872e-07,
"loss": 0.6429,
"step": 13005
},
{
"epoch": 0.82,
"grad_norm": 0.9109863042831421,
"learning_rate": 7.906086923844059e-07,
"loss": 0.5727,
"step": 13006
},
{
"epoch": 0.82,
"grad_norm": 0.863783597946167,
"learning_rate": 7.900550855813477e-07,
"loss": 0.5765,
"step": 13007
},
{
"epoch": 0.82,
"grad_norm": 0.9418416619300842,
"learning_rate": 7.895016560444241e-07,
"loss": 0.5862,
"step": 13008
},
{
"epoch": 0.82,
"grad_norm": 0.8956203460693359,
"learning_rate": 7.889484037969403e-07,
"loss": 0.6175,
"step": 13009
},
{
"epoch": 0.82,
"grad_norm": 0.8799732327461243,
"learning_rate": 7.883953288621887e-07,
"loss": 0.6195,
"step": 13010
},
{
"epoch": 0.82,
"grad_norm": 1.00110924243927,
"learning_rate": 7.878424312634592e-07,
"loss": 0.5845,
"step": 13011
},
{
"epoch": 0.82,
"grad_norm": 0.9354737401008606,
"learning_rate": 7.87289711024033e-07,
"loss": 0.592,
"step": 13012
},
{
"epoch": 0.82,
"grad_norm": 0.8658231496810913,
"learning_rate": 7.867371681671793e-07,
"loss": 0.6014,
"step": 13013
},
{
"epoch": 0.82,
"grad_norm": 0.9071126580238342,
"learning_rate": 7.861848027161694e-07,
"loss": 0.6201,
"step": 13014
},
{
"epoch": 0.82,
"grad_norm": 0.8745089769363403,
"learning_rate": 7.856326146942572e-07,
"loss": 0.5287,
"step": 13015
},
{
"epoch": 0.82,
"grad_norm": 0.890994131565094,
"learning_rate": 7.85080604124695e-07,
"loss": 0.634,
"step": 13016
},
{
"epoch": 0.82,
"grad_norm": 0.8491596579551697,
"learning_rate": 7.845287710307258e-07,
"loss": 0.5487,
"step": 13017
},
{
"epoch": 0.82,
"grad_norm": 0.942820131778717,
"learning_rate": 7.839771154355858e-07,
"loss": 0.5933,
"step": 13018
},
{
"epoch": 0.82,
"grad_norm": 0.940209686756134,
"learning_rate": 7.834256373625027e-07,
"loss": 0.5907,
"step": 13019
},
{
"epoch": 0.82,
"grad_norm": 0.8660345077514648,
"learning_rate": 7.828743368346991e-07,
"loss": 0.5164,
"step": 13020
},
{
"epoch": 0.82,
"grad_norm": 0.8865716457366943,
"learning_rate": 7.823232138753845e-07,
"loss": 0.5352,
"step": 13021
},
{
"epoch": 0.83,
"grad_norm": 0.9319779872894287,
"learning_rate": 7.817722685077689e-07,
"loss": 0.5374,
"step": 13022
},
{
"epoch": 0.83,
"grad_norm": 0.8646177649497986,
"learning_rate": 7.812215007550483e-07,
"loss": 0.5976,
"step": 13023
},
{
"epoch": 0.83,
"grad_norm": 0.9318941831588745,
"learning_rate": 7.806709106404142e-07,
"loss": 0.6182,
"step": 13024
},
{
"epoch": 0.83,
"grad_norm": 0.9168413281440735,
"learning_rate": 7.801204981870508e-07,
"loss": 0.5817,
"step": 13025
},
{
"epoch": 0.83,
"grad_norm": 0.8882789015769958,
"learning_rate": 7.795702634181318e-07,
"loss": 0.5534,
"step": 13026
},
{
"epoch": 0.83,
"grad_norm": 0.8667416572570801,
"learning_rate": 7.790202063568276e-07,
"loss": 0.5252,
"step": 13027
},
{
"epoch": 0.83,
"grad_norm": 0.8797557353973389,
"learning_rate": 7.784703270263006e-07,
"loss": 0.5719,
"step": 13028
},
{
"epoch": 0.83,
"grad_norm": 0.8629273176193237,
"learning_rate": 7.779206254497007e-07,
"loss": 0.5397,
"step": 13029
},
{
"epoch": 0.83,
"grad_norm": 0.9070542454719543,
"learning_rate": 7.773711016501762e-07,
"loss": 0.5972,
"step": 13030
},
{
"epoch": 0.83,
"grad_norm": 0.8951036930084229,
"learning_rate": 7.76821755650865e-07,
"loss": 0.6304,
"step": 13031
},
{
"epoch": 0.83,
"grad_norm": 0.9298555850982666,
"learning_rate": 7.762725874748983e-07,
"loss": 0.5728,
"step": 13032
},
{
"epoch": 0.83,
"grad_norm": 0.9324959516525269,
"learning_rate": 7.757235971454008e-07,
"loss": 0.5416,
"step": 13033
},
{
"epoch": 0.83,
"grad_norm": 0.8365843296051025,
"learning_rate": 7.751747846854851e-07,
"loss": 0.5546,
"step": 13034
},
{
"epoch": 0.83,
"grad_norm": 0.9446489810943604,
"learning_rate": 7.746261501182633e-07,
"loss": 0.5714,
"step": 13035
},
{
"epoch": 0.83,
"grad_norm": 0.8774089217185974,
"learning_rate": 7.740776934668365e-07,
"loss": 0.5605,
"step": 13036
},
{
"epoch": 0.83,
"grad_norm": 0.8851078152656555,
"learning_rate": 7.73529414754296e-07,
"loss": 0.5426,
"step": 13037
},
{
"epoch": 0.83,
"grad_norm": 0.9036283493041992,
"learning_rate": 7.72981314003729e-07,
"loss": 0.5378,
"step": 13038
},
{
"epoch": 0.83,
"grad_norm": 0.9143775701522827,
"learning_rate": 7.724333912382143e-07,
"loss": 0.5731,
"step": 13039
},
{
"epoch": 0.83,
"grad_norm": 0.8436862230300903,
"learning_rate": 7.718856464808222e-07,
"loss": 0.5392,
"step": 13040
},
{
"epoch": 0.83,
"grad_norm": 0.8102920055389404,
"learning_rate": 7.713380797546188e-07,
"loss": 0.5208,
"step": 13041
},
{
"epoch": 0.83,
"grad_norm": 0.922103762626648,
"learning_rate": 7.707906910826574e-07,
"loss": 0.5924,
"step": 13042
},
{
"epoch": 0.83,
"grad_norm": 0.8845114707946777,
"learning_rate": 7.702434804879861e-07,
"loss": 0.5718,
"step": 13043
},
{
"epoch": 0.83,
"grad_norm": 0.91489577293396,
"learning_rate": 7.696964479936497e-07,
"loss": 0.5519,
"step": 13044
},
{
"epoch": 0.83,
"grad_norm": 0.8923588991165161,
"learning_rate": 7.691495936226789e-07,
"loss": 0.5516,
"step": 13045
},
{
"epoch": 0.83,
"grad_norm": 1.006177544593811,
"learning_rate": 7.686029173981008e-07,
"loss": 0.6134,
"step": 13046
},
{
"epoch": 0.83,
"grad_norm": 0.9382014870643616,
"learning_rate": 7.680564193429336e-07,
"loss": 0.5919,
"step": 13047
},
{
"epoch": 0.83,
"grad_norm": 0.9472145438194275,
"learning_rate": 7.675100994801888e-07,
"loss": 0.5363,
"step": 13048
},
{
"epoch": 0.83,
"grad_norm": 0.8798018097877502,
"learning_rate": 7.669639578328713e-07,
"loss": 0.5514,
"step": 13049
},
{
"epoch": 0.83,
"grad_norm": 0.9217506647109985,
"learning_rate": 7.664179944239746e-07,
"loss": 0.5821,
"step": 13050
},
{
"epoch": 0.83,
"grad_norm": 0.8195998072624207,
"learning_rate": 7.658722092764876e-07,
"loss": 0.4924,
"step": 13051
},
{
"epoch": 0.83,
"grad_norm": 0.892219066619873,
"learning_rate": 7.653266024133943e-07,
"loss": 0.5429,
"step": 13052
},
{
"epoch": 0.83,
"grad_norm": 0.9027977585792542,
"learning_rate": 7.647811738576655e-07,
"loss": 0.5846,
"step": 13053
},
{
"epoch": 0.83,
"grad_norm": 0.8905366063117981,
"learning_rate": 7.642359236322683e-07,
"loss": 0.5554,
"step": 13054
},
{
"epoch": 0.83,
"grad_norm": 0.9175378680229187,
"learning_rate": 7.63690851760161e-07,
"loss": 0.5582,
"step": 13055
},
{
"epoch": 0.83,
"grad_norm": 0.945669412612915,
"learning_rate": 7.631459582642947e-07,
"loss": 0.556,
"step": 13056
},
{
"epoch": 0.83,
"grad_norm": 0.8952832818031311,
"learning_rate": 7.626012431676138e-07,
"loss": 0.6063,
"step": 13057
},
{
"epoch": 0.83,
"grad_norm": 0.8996466994285583,
"learning_rate": 7.620567064930545e-07,
"loss": 0.5752,
"step": 13058
},
{
"epoch": 0.83,
"grad_norm": 0.8489691615104675,
"learning_rate": 7.615123482635433e-07,
"loss": 0.5823,
"step": 13059
},
{
"epoch": 0.83,
"grad_norm": 0.9028809070587158,
"learning_rate": 7.609681685020026e-07,
"loss": 0.5796,
"step": 13060
},
{
"epoch": 0.83,
"grad_norm": 0.8422659039497375,
"learning_rate": 7.604241672313461e-07,
"loss": 0.5435,
"step": 13061
},
{
"epoch": 0.83,
"grad_norm": 0.837838888168335,
"learning_rate": 7.59880344474479e-07,
"loss": 0.5687,
"step": 13062
},
{
"epoch": 0.83,
"grad_norm": 0.8525023460388184,
"learning_rate": 7.593367002543018e-07,
"loss": 0.5606,
"step": 13063
},
{
"epoch": 0.83,
"grad_norm": 0.8720320463180542,
"learning_rate": 7.587932345937016e-07,
"loss": 0.5699,
"step": 13064
},
{
"epoch": 0.83,
"grad_norm": 0.8558526635169983,
"learning_rate": 7.582499475155653e-07,
"loss": 0.6107,
"step": 13065
},
{
"epoch": 0.83,
"grad_norm": 0.8588683009147644,
"learning_rate": 7.577068390427689e-07,
"loss": 0.5271,
"step": 13066
},
{
"epoch": 0.83,
"grad_norm": 0.806747317314148,
"learning_rate": 7.571639091981786e-07,
"loss": 0.5362,
"step": 13067
},
{
"epoch": 0.83,
"grad_norm": 0.9133474826812744,
"learning_rate": 7.566211580046562e-07,
"loss": 0.5963,
"step": 13068
},
{
"epoch": 0.83,
"grad_norm": 0.7889014482498169,
"learning_rate": 7.56078585485055e-07,
"loss": 0.5403,
"step": 13069
},
{
"epoch": 0.83,
"grad_norm": 0.86361163854599,
"learning_rate": 7.555361916622217e-07,
"loss": 0.5825,
"step": 13070
},
{
"epoch": 0.83,
"grad_norm": 0.8512160181999207,
"learning_rate": 7.549939765589942e-07,
"loss": 0.5044,
"step": 13071
},
{
"epoch": 0.83,
"grad_norm": 0.8855159282684326,
"learning_rate": 7.544519401982025e-07,
"loss": 0.5909,
"step": 13072
},
{
"epoch": 0.83,
"grad_norm": 0.9207944273948669,
"learning_rate": 7.539100826026691e-07,
"loss": 0.5993,
"step": 13073
},
{
"epoch": 0.83,
"grad_norm": 0.9316564798355103,
"learning_rate": 7.533684037952133e-07,
"loss": 0.5755,
"step": 13074
},
{
"epoch": 0.83,
"grad_norm": 0.8831668496131897,
"learning_rate": 7.528269037986402e-07,
"loss": 0.6368,
"step": 13075
},
{
"epoch": 0.83,
"grad_norm": 0.9168758988380432,
"learning_rate": 7.522855826357511e-07,
"loss": 0.5728,
"step": 13076
},
{
"epoch": 0.83,
"grad_norm": 0.8036249876022339,
"learning_rate": 7.517444403293394e-07,
"loss": 0.5295,
"step": 13077
},
{
"epoch": 0.83,
"grad_norm": 0.8695041537284851,
"learning_rate": 7.512034769021909e-07,
"loss": 0.592,
"step": 13078
},
{
"epoch": 0.83,
"grad_norm": 0.9398552775382996,
"learning_rate": 7.506626923770843e-07,
"loss": 0.5927,
"step": 13079
},
{
"epoch": 0.83,
"grad_norm": 0.8533617258071899,
"learning_rate": 7.501220867767883e-07,
"loss": 0.5991,
"step": 13080
},
{
"epoch": 0.83,
"grad_norm": 0.877224862575531,
"learning_rate": 7.495816601240664e-07,
"loss": 0.5839,
"step": 13081
},
{
"epoch": 0.83,
"grad_norm": 0.9243265390396118,
"learning_rate": 7.490414124416761e-07,
"loss": 0.6103,
"step": 13082
},
{
"epoch": 0.83,
"grad_norm": 1.0017024278640747,
"learning_rate": 7.485013437523636e-07,
"loss": 0.6144,
"step": 13083
},
{
"epoch": 0.83,
"grad_norm": 0.8907317519187927,
"learning_rate": 7.479614540788687e-07,
"loss": 0.6113,
"step": 13084
},
{
"epoch": 0.83,
"grad_norm": 0.9147844910621643,
"learning_rate": 7.474217434439263e-07,
"loss": 0.5684,
"step": 13085
},
{
"epoch": 0.83,
"grad_norm": 0.8742222785949707,
"learning_rate": 7.468822118702596e-07,
"loss": 0.5424,
"step": 13086
},
{
"epoch": 0.83,
"grad_norm": 0.9334181547164917,
"learning_rate": 7.463428593805894e-07,
"loss": 0.5554,
"step": 13087
},
{
"epoch": 0.83,
"grad_norm": 0.8707894682884216,
"learning_rate": 7.458036859976225e-07,
"loss": 0.6064,
"step": 13088
},
{
"epoch": 0.83,
"grad_norm": 0.9232116341590881,
"learning_rate": 7.452646917440631e-07,
"loss": 0.6251,
"step": 13089
},
{
"epoch": 0.83,
"grad_norm": 0.9443577527999878,
"learning_rate": 7.447258766426063e-07,
"loss": 0.5861,
"step": 13090
},
{
"epoch": 0.83,
"grad_norm": 0.87910395860672,
"learning_rate": 7.441872407159401e-07,
"loss": 0.5628,
"step": 13091
},
{
"epoch": 0.83,
"grad_norm": 0.8710011839866638,
"learning_rate": 7.43648783986744e-07,
"loss": 0.5954,
"step": 13092
},
{
"epoch": 0.83,
"grad_norm": 0.9100737571716309,
"learning_rate": 7.431105064776922e-07,
"loss": 0.5956,
"step": 13093
},
{
"epoch": 0.83,
"grad_norm": 0.8823485970497131,
"learning_rate": 7.425724082114455e-07,
"loss": 0.5534,
"step": 13094
},
{
"epoch": 0.83,
"grad_norm": 0.9108067750930786,
"learning_rate": 7.420344892106674e-07,
"loss": 0.5459,
"step": 13095
},
{
"epoch": 0.83,
"grad_norm": 0.9197466969490051,
"learning_rate": 7.414967494980024e-07,
"loss": 0.5779,
"step": 13096
},
{
"epoch": 0.83,
"grad_norm": 0.8721498847007751,
"learning_rate": 7.40959189096096e-07,
"loss": 0.5026,
"step": 13097
},
{
"epoch": 0.83,
"grad_norm": 0.9107875823974609,
"learning_rate": 7.404218080275816e-07,
"loss": 0.6035,
"step": 13098
},
{
"epoch": 0.83,
"grad_norm": 0.8590791821479797,
"learning_rate": 7.398846063150866e-07,
"loss": 0.5347,
"step": 13099
},
{
"epoch": 0.83,
"grad_norm": 0.874270498752594,
"learning_rate": 7.393475839812314e-07,
"loss": 0.5954,
"step": 13100
},
{
"epoch": 0.83,
"grad_norm": 0.9111903309822083,
"learning_rate": 7.388107410486289e-07,
"loss": 0.5691,
"step": 13101
},
{
"epoch": 0.83,
"grad_norm": 0.9081681370735168,
"learning_rate": 7.3827407753988e-07,
"loss": 0.5675,
"step": 13102
},
{
"epoch": 0.83,
"grad_norm": 0.8183289766311646,
"learning_rate": 7.377375934775865e-07,
"loss": 0.5498,
"step": 13103
},
{
"epoch": 0.83,
"grad_norm": 0.8380873203277588,
"learning_rate": 7.372012888843344e-07,
"loss": 0.5786,
"step": 13104
},
{
"epoch": 0.83,
"grad_norm": 0.8212375044822693,
"learning_rate": 7.366651637827065e-07,
"loss": 0.5647,
"step": 13105
},
{
"epoch": 0.83,
"grad_norm": 0.900518000125885,
"learning_rate": 7.361292181952795e-07,
"loss": 0.5743,
"step": 13106
},
{
"epoch": 0.83,
"grad_norm": 0.9295457601547241,
"learning_rate": 7.355934521446151e-07,
"loss": 0.5577,
"step": 13107
},
{
"epoch": 0.83,
"grad_norm": 0.8961006999015808,
"learning_rate": 7.350578656532776e-07,
"loss": 0.5885,
"step": 13108
},
{
"epoch": 0.83,
"grad_norm": 0.8948516249656677,
"learning_rate": 7.345224587438171e-07,
"loss": 0.5077,
"step": 13109
},
{
"epoch": 0.83,
"grad_norm": 0.9140964150428772,
"learning_rate": 7.339872314387763e-07,
"loss": 0.6131,
"step": 13110
},
{
"epoch": 0.83,
"grad_norm": 0.9755547046661377,
"learning_rate": 7.334521837606934e-07,
"loss": 0.6061,
"step": 13111
},
{
"epoch": 0.83,
"grad_norm": 0.8581327795982361,
"learning_rate": 7.329173157320962e-07,
"loss": 0.5332,
"step": 13112
},
{
"epoch": 0.83,
"grad_norm": 0.9618088603019714,
"learning_rate": 7.323826273755069e-07,
"loss": 0.5948,
"step": 13113
},
{
"epoch": 0.83,
"grad_norm": 0.8937922120094299,
"learning_rate": 7.318481187134408e-07,
"loss": 0.5915,
"step": 13114
},
{
"epoch": 0.83,
"grad_norm": 0.9323161244392395,
"learning_rate": 7.313137897683997e-07,
"loss": 0.6016,
"step": 13115
},
{
"epoch": 0.83,
"grad_norm": 0.8632552623748779,
"learning_rate": 7.30779640562887e-07,
"loss": 0.6145,
"step": 13116
},
{
"epoch": 0.83,
"grad_norm": 0.8710545897483826,
"learning_rate": 7.302456711193928e-07,
"loss": 0.5644,
"step": 13117
},
{
"epoch": 0.83,
"grad_norm": 0.874191164970398,
"learning_rate": 7.297118814603987e-07,
"loss": 0.5579,
"step": 13118
},
{
"epoch": 0.83,
"grad_norm": 0.8017786741256714,
"learning_rate": 7.291782716083823e-07,
"loss": 0.5414,
"step": 13119
},
{
"epoch": 0.83,
"grad_norm": 0.8626580834388733,
"learning_rate": 7.286448415858116e-07,
"loss": 0.5909,
"step": 13120
},
{
"epoch": 0.83,
"grad_norm": 0.8846031427383423,
"learning_rate": 7.281115914151477e-07,
"loss": 0.5291,
"step": 13121
},
{
"epoch": 0.83,
"grad_norm": 0.8800442814826965,
"learning_rate": 7.275785211188441e-07,
"loss": 0.5698,
"step": 13122
},
{
"epoch": 0.83,
"grad_norm": 0.8646133542060852,
"learning_rate": 7.270456307193474e-07,
"loss": 0.5776,
"step": 13123
},
{
"epoch": 0.83,
"grad_norm": 0.9423984289169312,
"learning_rate": 7.265129202390924e-07,
"loss": 0.5374,
"step": 13124
},
{
"epoch": 0.83,
"grad_norm": 0.8401879072189331,
"learning_rate": 7.259803897005141e-07,
"loss": 0.5583,
"step": 13125
},
{
"epoch": 0.83,
"grad_norm": 0.8532096147537231,
"learning_rate": 7.254480391260321e-07,
"loss": 0.5056,
"step": 13126
},
{
"epoch": 0.83,
"grad_norm": 0.8508062958717346,
"learning_rate": 7.249158685380631e-07,
"loss": 0.5793,
"step": 13127
},
{
"epoch": 0.83,
"grad_norm": 0.8456823825836182,
"learning_rate": 7.243838779590151e-07,
"loss": 0.5542,
"step": 13128
},
{
"epoch": 0.83,
"grad_norm": 0.9003103375434875,
"learning_rate": 7.238520674112881e-07,
"loss": 0.5354,
"step": 13129
},
{
"epoch": 0.83,
"grad_norm": 0.8607522249221802,
"learning_rate": 7.233204369172753e-07,
"loss": 0.544,
"step": 13130
},
{
"epoch": 0.83,
"grad_norm": 0.8859104514122009,
"learning_rate": 7.22788986499362e-07,
"loss": 0.5419,
"step": 13131
},
{
"epoch": 0.83,
"grad_norm": 0.9029030799865723,
"learning_rate": 7.222577161799232e-07,
"loss": 0.5825,
"step": 13132
},
{
"epoch": 0.83,
"grad_norm": 0.934764564037323,
"learning_rate": 7.217266259813332e-07,
"loss": 0.5783,
"step": 13133
},
{
"epoch": 0.83,
"grad_norm": 0.8300181031227112,
"learning_rate": 7.211957159259503e-07,
"loss": 0.5394,
"step": 13134
},
{
"epoch": 0.83,
"grad_norm": 0.8454645276069641,
"learning_rate": 7.206649860361314e-07,
"loss": 0.5528,
"step": 13135
},
{
"epoch": 0.83,
"grad_norm": 0.8897960782051086,
"learning_rate": 7.201344363342245e-07,
"loss": 0.5781,
"step": 13136
},
{
"epoch": 0.83,
"grad_norm": 0.8986917734146118,
"learning_rate": 7.196040668425653e-07,
"loss": 0.6028,
"step": 13137
},
{
"epoch": 0.83,
"grad_norm": 0.9327632784843445,
"learning_rate": 7.190738775834894e-07,
"loss": 0.5857,
"step": 13138
},
{
"epoch": 0.83,
"grad_norm": 0.8856915235519409,
"learning_rate": 7.185438685793217e-07,
"loss": 0.5882,
"step": 13139
},
{
"epoch": 0.83,
"grad_norm": 0.9135767221450806,
"learning_rate": 7.180140398523761e-07,
"loss": 0.59,
"step": 13140
},
{
"epoch": 0.83,
"grad_norm": 0.8973036408424377,
"learning_rate": 7.174843914249636e-07,
"loss": 0.5655,
"step": 13141
},
{
"epoch": 0.83,
"grad_norm": 0.8983938694000244,
"learning_rate": 7.169549233193857e-07,
"loss": 0.5778,
"step": 13142
},
{
"epoch": 0.83,
"grad_norm": 0.9242495894432068,
"learning_rate": 7.164256355579363e-07,
"loss": 0.5819,
"step": 13143
},
{
"epoch": 0.83,
"grad_norm": 0.9081816673278809,
"learning_rate": 7.158965281629027e-07,
"loss": 0.5798,
"step": 13144
},
{
"epoch": 0.83,
"grad_norm": 0.9231504201889038,
"learning_rate": 7.153676011565613e-07,
"loss": 0.6053,
"step": 13145
},
{
"epoch": 0.83,
"grad_norm": 0.866088330745697,
"learning_rate": 7.148388545611856e-07,
"loss": 0.5286,
"step": 13146
},
{
"epoch": 0.83,
"grad_norm": 0.8189731240272522,
"learning_rate": 7.143102883990405e-07,
"loss": 0.5759,
"step": 13147
},
{
"epoch": 0.83,
"grad_norm": 0.8492090702056885,
"learning_rate": 7.137819026923786e-07,
"loss": 0.5127,
"step": 13148
},
{
"epoch": 0.83,
"grad_norm": 0.8900519609451294,
"learning_rate": 7.132536974634508e-07,
"loss": 0.5905,
"step": 13149
},
{
"epoch": 0.83,
"grad_norm": 0.8588072657585144,
"learning_rate": 7.127256727344967e-07,
"loss": 0.5479,
"step": 13150
},
{
"epoch": 0.83,
"grad_norm": 0.9344004988670349,
"learning_rate": 7.121978285277503e-07,
"loss": 0.5901,
"step": 13151
},
{
"epoch": 0.83,
"grad_norm": 0.9044827222824097,
"learning_rate": 7.116701648654384e-07,
"loss": 0.5989,
"step": 13152
},
{
"epoch": 0.83,
"grad_norm": 0.8644382953643799,
"learning_rate": 7.11142681769777e-07,
"loss": 0.5589,
"step": 13153
},
{
"epoch": 0.83,
"grad_norm": 0.9335626363754272,
"learning_rate": 7.106153792629761e-07,
"loss": 0.5711,
"step": 13154
},
{
"epoch": 0.83,
"grad_norm": 0.8146085143089294,
"learning_rate": 7.100882573672419e-07,
"loss": 0.5407,
"step": 13155
},
{
"epoch": 0.83,
"grad_norm": 0.8309633731842041,
"learning_rate": 7.095613161047666e-07,
"loss": 0.5615,
"step": 13156
},
{
"epoch": 0.83,
"grad_norm": 0.8940461277961731,
"learning_rate": 7.09034555497739e-07,
"loss": 0.561,
"step": 13157
},
{
"epoch": 0.83,
"grad_norm": 0.9012131690979004,
"learning_rate": 7.085079755683389e-07,
"loss": 0.5582,
"step": 13158
},
{
"epoch": 0.83,
"grad_norm": 0.8765063881874084,
"learning_rate": 7.079815763387393e-07,
"loss": 0.5955,
"step": 13159
},
{
"epoch": 0.83,
"grad_norm": 0.8758644461631775,
"learning_rate": 7.074553578311055e-07,
"loss": 0.5402,
"step": 13160
},
{
"epoch": 0.83,
"grad_norm": 0.8788025975227356,
"learning_rate": 7.06929320067593e-07,
"loss": 0.6192,
"step": 13161
},
{
"epoch": 0.83,
"grad_norm": 0.9614549279212952,
"learning_rate": 7.064034630703515e-07,
"loss": 0.6092,
"step": 13162
},
{
"epoch": 0.83,
"grad_norm": 0.9305884838104248,
"learning_rate": 7.058777868615258e-07,
"loss": 0.602,
"step": 13163
},
{
"epoch": 0.83,
"grad_norm": 0.8970014452934265,
"learning_rate": 7.053522914632466e-07,
"loss": 0.5997,
"step": 13164
},
{
"epoch": 0.83,
"grad_norm": 0.8438460230827332,
"learning_rate": 7.048269768976429e-07,
"loss": 0.5581,
"step": 13165
},
{
"epoch": 0.83,
"grad_norm": 0.9222960472106934,
"learning_rate": 7.043018431868348e-07,
"loss": 0.5482,
"step": 13166
},
{
"epoch": 0.83,
"grad_norm": 0.897331714630127,
"learning_rate": 7.037768903529302e-07,
"loss": 0.6095,
"step": 13167
},
{
"epoch": 0.83,
"grad_norm": 0.8716689348220825,
"learning_rate": 7.032521184180369e-07,
"loss": 0.5955,
"step": 13168
},
{
"epoch": 0.83,
"grad_norm": 0.8186154961585999,
"learning_rate": 7.027275274042489e-07,
"loss": 0.5867,
"step": 13169
},
{
"epoch": 0.83,
"grad_norm": 0.9115201830863953,
"learning_rate": 7.022031173336557e-07,
"loss": 0.5619,
"step": 13170
},
{
"epoch": 0.83,
"grad_norm": 0.8985578417778015,
"learning_rate": 7.016788882283382e-07,
"loss": 0.6085,
"step": 13171
},
{
"epoch": 0.83,
"grad_norm": 0.9288114905357361,
"learning_rate": 7.011548401103696e-07,
"loss": 0.6011,
"step": 13172
},
{
"epoch": 0.83,
"grad_norm": 0.8916085958480835,
"learning_rate": 7.006309730018168e-07,
"loss": 0.5845,
"step": 13173
},
{
"epoch": 0.83,
"grad_norm": 0.8739166855812073,
"learning_rate": 7.001072869247378e-07,
"loss": 0.6088,
"step": 13174
},
{
"epoch": 0.83,
"grad_norm": 0.9117295145988464,
"learning_rate": 6.995837819011808e-07,
"loss": 0.5982,
"step": 13175
},
{
"epoch": 0.83,
"grad_norm": 0.8260350227355957,
"learning_rate": 6.990604579531929e-07,
"loss": 0.5691,
"step": 13176
},
{
"epoch": 0.83,
"grad_norm": 0.940250039100647,
"learning_rate": 6.985373151028058e-07,
"loss": 0.5862,
"step": 13177
},
{
"epoch": 0.83,
"grad_norm": 0.8562113046646118,
"learning_rate": 6.980143533720491e-07,
"loss": 0.5487,
"step": 13178
},
{
"epoch": 0.83,
"grad_norm": 0.8613032698631287,
"learning_rate": 6.974915727829423e-07,
"loss": 0.5633,
"step": 13179
},
{
"epoch": 0.84,
"grad_norm": 0.986914336681366,
"learning_rate": 6.96968973357498e-07,
"loss": 0.6344,
"step": 13180
},
{
"epoch": 0.84,
"grad_norm": 0.8467575311660767,
"learning_rate": 6.964465551177208e-07,
"loss": 0.4884,
"step": 13181
},
{
"epoch": 0.84,
"grad_norm": 0.8608553409576416,
"learning_rate": 6.959243180856096e-07,
"loss": 0.627,
"step": 13182
},
{
"epoch": 0.84,
"grad_norm": 0.8423926830291748,
"learning_rate": 6.954022622831514e-07,
"loss": 0.5243,
"step": 13183
},
{
"epoch": 0.84,
"grad_norm": 0.8840621113777161,
"learning_rate": 6.948803877323296e-07,
"loss": 0.5071,
"step": 13184
},
{
"epoch": 0.84,
"grad_norm": 0.8253465294837952,
"learning_rate": 6.943586944551178e-07,
"loss": 0.571,
"step": 13185
},
{
"epoch": 0.84,
"grad_norm": 0.8736525774002075,
"learning_rate": 6.938371824734835e-07,
"loss": 0.6001,
"step": 13186
},
{
"epoch": 0.84,
"grad_norm": 0.9959997534751892,
"learning_rate": 6.933158518093852e-07,
"loss": 0.6158,
"step": 13187
},
{
"epoch": 0.84,
"grad_norm": 0.9295116066932678,
"learning_rate": 6.927947024847748e-07,
"loss": 0.581,
"step": 13188
},
{
"epoch": 0.84,
"grad_norm": 0.9184585809707642,
"learning_rate": 6.922737345215952e-07,
"loss": 0.544,
"step": 13189
},
{
"epoch": 0.84,
"grad_norm": 0.8298773169517517,
"learning_rate": 6.91752947941785e-07,
"loss": 0.561,
"step": 13190
},
{
"epoch": 0.84,
"grad_norm": 0.8674336075782776,
"learning_rate": 6.912323427672691e-07,
"loss": 0.5629,
"step": 13191
},
{
"epoch": 0.84,
"grad_norm": 0.9086819887161255,
"learning_rate": 6.907119190199706e-07,
"loss": 0.5735,
"step": 13192
},
{
"epoch": 0.84,
"grad_norm": 0.8917360305786133,
"learning_rate": 6.901916767218019e-07,
"loss": 0.5546,
"step": 13193
},
{
"epoch": 0.84,
"grad_norm": 0.8581564426422119,
"learning_rate": 6.896716158946692e-07,
"loss": 0.5619,
"step": 13194
},
{
"epoch": 0.84,
"grad_norm": 0.8573694229125977,
"learning_rate": 6.891517365604705e-07,
"loss": 0.5962,
"step": 13195
},
{
"epoch": 0.84,
"grad_norm": 0.8820661306381226,
"learning_rate": 6.886320387410967e-07,
"loss": 0.6283,
"step": 13196
},
{
"epoch": 0.84,
"grad_norm": 0.8105853796005249,
"learning_rate": 6.881125224584273e-07,
"loss": 0.528,
"step": 13197
},
{
"epoch": 0.84,
"grad_norm": 0.9009973406791687,
"learning_rate": 6.875931877343417e-07,
"loss": 0.5681,
"step": 13198
},
{
"epoch": 0.84,
"grad_norm": 0.8252160549163818,
"learning_rate": 6.870740345907046e-07,
"loss": 0.5771,
"step": 13199
},
{
"epoch": 0.84,
"grad_norm": 0.9308204054832458,
"learning_rate": 6.865550630493756e-07,
"loss": 0.5311,
"step": 13200
},
{
"epoch": 0.84,
"grad_norm": 0.9394121766090393,
"learning_rate": 6.860362731322079e-07,
"loss": 0.618,
"step": 13201
},
{
"epoch": 0.84,
"grad_norm": 0.8921918869018555,
"learning_rate": 6.855176648610457e-07,
"loss": 0.5777,
"step": 13202
},
{
"epoch": 0.84,
"grad_norm": 0.8987441062927246,
"learning_rate": 6.849992382577253e-07,
"loss": 0.5614,
"step": 13203
},
{
"epoch": 0.84,
"grad_norm": 0.8814181089401245,
"learning_rate": 6.844809933440776e-07,
"loss": 0.5644,
"step": 13204
},
{
"epoch": 0.84,
"grad_norm": 0.9095494151115417,
"learning_rate": 6.839629301419204e-07,
"loss": 0.5416,
"step": 13205
},
{
"epoch": 0.84,
"grad_norm": 0.864000678062439,
"learning_rate": 6.83445048673072e-07,
"loss": 0.5482,
"step": 13206
},
{
"epoch": 0.84,
"grad_norm": 0.8674211502075195,
"learning_rate": 6.829273489593352e-07,
"loss": 0.5395,
"step": 13207
},
{
"epoch": 0.84,
"grad_norm": 0.8799319863319397,
"learning_rate": 6.824098310225097e-07,
"loss": 0.5647,
"step": 13208
},
{
"epoch": 0.84,
"grad_norm": 0.9398074150085449,
"learning_rate": 6.818924948843863e-07,
"loss": 0.5973,
"step": 13209
},
{
"epoch": 0.84,
"grad_norm": 0.8925483226776123,
"learning_rate": 6.81375340566749e-07,
"loss": 0.5715,
"step": 13210
},
{
"epoch": 0.84,
"grad_norm": 0.8708029389381409,
"learning_rate": 6.808583680913722e-07,
"loss": 0.5579,
"step": 13211
},
{
"epoch": 0.84,
"grad_norm": 0.8572626113891602,
"learning_rate": 6.803415774800253e-07,
"loss": 0.5613,
"step": 13212
},
{
"epoch": 0.84,
"grad_norm": 0.8568171858787537,
"learning_rate": 6.798249687544667e-07,
"loss": 0.5321,
"step": 13213
},
{
"epoch": 0.84,
"grad_norm": 0.8693404197692871,
"learning_rate": 6.793085419364498e-07,
"loss": 0.555,
"step": 13214
},
{
"epoch": 0.84,
"grad_norm": 0.8741576075553894,
"learning_rate": 6.787922970477196e-07,
"loss": 0.5667,
"step": 13215
},
{
"epoch": 0.84,
"grad_norm": 0.9199385046958923,
"learning_rate": 6.782762341100135e-07,
"loss": 0.6141,
"step": 13216
},
{
"epoch": 0.84,
"grad_norm": 0.8483101725578308,
"learning_rate": 6.777603531450617e-07,
"loss": 0.5482,
"step": 13217
},
{
"epoch": 0.84,
"grad_norm": 0.8694477081298828,
"learning_rate": 6.772446541745836e-07,
"loss": 0.5839,
"step": 13218
},
{
"epoch": 0.84,
"grad_norm": 0.9048340320587158,
"learning_rate": 6.767291372202967e-07,
"loss": 0.5781,
"step": 13219
},
{
"epoch": 0.84,
"grad_norm": 0.9429792761802673,
"learning_rate": 6.762138023039072e-07,
"loss": 0.5974,
"step": 13220
},
{
"epoch": 0.84,
"grad_norm": 0.913020670413971,
"learning_rate": 6.756986494471119e-07,
"loss": 0.6104,
"step": 13221
},
{
"epoch": 0.84,
"grad_norm": 0.8851649761199951,
"learning_rate": 6.751836786716032e-07,
"loss": 0.5967,
"step": 13222
},
{
"epoch": 0.84,
"grad_norm": 0.9294677972793579,
"learning_rate": 6.74668889999065e-07,
"loss": 0.5813,
"step": 13223
},
{
"epoch": 0.84,
"grad_norm": 0.8423077464103699,
"learning_rate": 6.741542834511727e-07,
"loss": 0.5543,
"step": 13224
},
{
"epoch": 0.84,
"grad_norm": 0.8565467000007629,
"learning_rate": 6.736398590495968e-07,
"loss": 0.5139,
"step": 13225
},
{
"epoch": 0.84,
"grad_norm": 0.8920080661773682,
"learning_rate": 6.731256168159939e-07,
"loss": 0.5972,
"step": 13226
},
{
"epoch": 0.84,
"grad_norm": 0.8450667858123779,
"learning_rate": 6.726115567720198e-07,
"loss": 0.5539,
"step": 13227
},
{
"epoch": 0.84,
"grad_norm": 0.901174783706665,
"learning_rate": 6.720976789393202e-07,
"loss": 0.611,
"step": 13228
},
{
"epoch": 0.84,
"grad_norm": 0.8898508548736572,
"learning_rate": 6.71583983339531e-07,
"loss": 0.5798,
"step": 13229
},
{
"epoch": 0.84,
"grad_norm": 0.881693422794342,
"learning_rate": 6.710704699942827e-07,
"loss": 0.5774,
"step": 13230
},
{
"epoch": 0.84,
"grad_norm": 0.8955451846122742,
"learning_rate": 6.705571389251975e-07,
"loss": 0.5217,
"step": 13231
},
{
"epoch": 0.84,
"grad_norm": 0.9116746187210083,
"learning_rate": 6.700439901538902e-07,
"loss": 0.5331,
"step": 13232
},
{
"epoch": 0.84,
"grad_norm": 0.9191250801086426,
"learning_rate": 6.695310237019692e-07,
"loss": 0.5772,
"step": 13233
},
{
"epoch": 0.84,
"grad_norm": 0.9273549914360046,
"learning_rate": 6.690182395910305e-07,
"loss": 0.6408,
"step": 13234
},
{
"epoch": 0.84,
"grad_norm": 0.8469404578208923,
"learning_rate": 6.685056378426663e-07,
"loss": 0.5477,
"step": 13235
},
{
"epoch": 0.84,
"grad_norm": 0.9355968236923218,
"learning_rate": 6.679932184784638e-07,
"loss": 0.5865,
"step": 13236
},
{
"epoch": 0.84,
"grad_norm": 0.865906834602356,
"learning_rate": 6.674809815199962e-07,
"loss": 0.5217,
"step": 13237
},
{
"epoch": 0.84,
"grad_norm": 0.9029650688171387,
"learning_rate": 6.669689269888325e-07,
"loss": 0.5829,
"step": 13238
},
{
"epoch": 0.84,
"grad_norm": 0.8489553332328796,
"learning_rate": 6.664570549065336e-07,
"loss": 0.519,
"step": 13239
},
{
"epoch": 0.84,
"grad_norm": 0.8921743631362915,
"learning_rate": 6.659453652946529e-07,
"loss": 0.5553,
"step": 13240
},
{
"epoch": 0.84,
"grad_norm": 0.8686976432800293,
"learning_rate": 6.654338581747366e-07,
"loss": 0.5974,
"step": 13241
},
{
"epoch": 0.84,
"grad_norm": 0.9544159173965454,
"learning_rate": 6.649225335683213e-07,
"loss": 0.5986,
"step": 13242
},
{
"epoch": 0.84,
"grad_norm": 0.9924260973930359,
"learning_rate": 6.644113914969369e-07,
"loss": 0.6438,
"step": 13243
},
{
"epoch": 0.84,
"grad_norm": 0.8223074674606323,
"learning_rate": 6.639004319821063e-07,
"loss": 0.5702,
"step": 13244
},
{
"epoch": 0.84,
"grad_norm": 0.889176070690155,
"learning_rate": 6.63389655045345e-07,
"loss": 0.5799,
"step": 13245
},
{
"epoch": 0.84,
"grad_norm": 0.9296001195907593,
"learning_rate": 6.628790607081586e-07,
"loss": 0.583,
"step": 13246
},
{
"epoch": 0.84,
"grad_norm": 0.8625611662864685,
"learning_rate": 6.623686489920489e-07,
"loss": 0.5476,
"step": 13247
},
{
"epoch": 0.84,
"grad_norm": 0.9394053220748901,
"learning_rate": 6.61858419918503e-07,
"loss": 0.5582,
"step": 13248
},
{
"epoch": 0.84,
"grad_norm": 0.9077306389808655,
"learning_rate": 6.613483735090104e-07,
"loss": 0.5972,
"step": 13249
},
{
"epoch": 0.84,
"grad_norm": 0.8808714151382446,
"learning_rate": 6.608385097850439e-07,
"loss": 0.5684,
"step": 13250
},
{
"epoch": 0.84,
"grad_norm": 0.9206782579421997,
"learning_rate": 6.603288287680726e-07,
"loss": 0.5882,
"step": 13251
},
{
"epoch": 0.84,
"grad_norm": 0.8908818960189819,
"learning_rate": 6.598193304795575e-07,
"loss": 0.5315,
"step": 13252
},
{
"epoch": 0.84,
"grad_norm": 0.8861278891563416,
"learning_rate": 6.593100149409521e-07,
"loss": 0.6004,
"step": 13253
},
{
"epoch": 0.84,
"grad_norm": 0.9618304967880249,
"learning_rate": 6.588008821737019e-07,
"loss": 0.5732,
"step": 13254
},
{
"epoch": 0.84,
"grad_norm": 0.9097421169281006,
"learning_rate": 6.582919321992459e-07,
"loss": 0.5964,
"step": 13255
},
{
"epoch": 0.84,
"grad_norm": 0.9288156032562256,
"learning_rate": 6.577831650390104e-07,
"loss": 0.5434,
"step": 13256
},
{
"epoch": 0.84,
"grad_norm": 0.9109866619110107,
"learning_rate": 6.572745807144226e-07,
"loss": 0.5443,
"step": 13257
},
{
"epoch": 0.84,
"grad_norm": 0.8743159770965576,
"learning_rate": 6.567661792468944e-07,
"loss": 0.548,
"step": 13258
},
{
"epoch": 0.84,
"grad_norm": 0.8892823457717896,
"learning_rate": 6.562579606578328e-07,
"loss": 0.5657,
"step": 13259
},
{
"epoch": 0.84,
"grad_norm": 0.8872804045677185,
"learning_rate": 6.557499249686377e-07,
"loss": 0.5829,
"step": 13260
},
{
"epoch": 0.84,
"grad_norm": 0.9161667823791504,
"learning_rate": 6.552420722007008e-07,
"loss": 0.5586,
"step": 13261
},
{
"epoch": 0.84,
"grad_norm": 0.9431544542312622,
"learning_rate": 6.547344023754065e-07,
"loss": 0.6225,
"step": 13262
},
{
"epoch": 0.84,
"grad_norm": 0.9163276553153992,
"learning_rate": 6.542269155141306e-07,
"loss": 0.5697,
"step": 13263
},
{
"epoch": 0.84,
"grad_norm": 0.8587558269500732,
"learning_rate": 6.537196116382411e-07,
"loss": 0.5308,
"step": 13264
},
{
"epoch": 0.84,
"grad_norm": 0.9046618938446045,
"learning_rate": 6.532124907690979e-07,
"loss": 0.5743,
"step": 13265
},
{
"epoch": 0.84,
"grad_norm": 0.825258731842041,
"learning_rate": 6.527055529280574e-07,
"loss": 0.5398,
"step": 13266
},
{
"epoch": 0.84,
"grad_norm": 0.9225800037384033,
"learning_rate": 6.521987981364614e-07,
"loss": 0.5864,
"step": 13267
},
{
"epoch": 0.84,
"grad_norm": 0.8566347360610962,
"learning_rate": 6.516922264156495e-07,
"loss": 0.5508,
"step": 13268
},
{
"epoch": 0.84,
"grad_norm": 0.8173342943191528,
"learning_rate": 6.511858377869517e-07,
"loss": 0.5485,
"step": 13269
},
{
"epoch": 0.84,
"grad_norm": 0.8830443620681763,
"learning_rate": 6.506796322716891e-07,
"loss": 0.5276,
"step": 13270
},
{
"epoch": 0.84,
"grad_norm": 0.9083720445632935,
"learning_rate": 6.501736098911787e-07,
"loss": 0.5389,
"step": 13271
},
{
"epoch": 0.84,
"grad_norm": 0.8697338104248047,
"learning_rate": 6.496677706667243e-07,
"loss": 0.5826,
"step": 13272
},
{
"epoch": 0.84,
"grad_norm": 0.8422214984893799,
"learning_rate": 6.491621146196253e-07,
"loss": 0.5484,
"step": 13273
},
{
"epoch": 0.84,
"grad_norm": 0.8826960921287537,
"learning_rate": 6.486566417711765e-07,
"loss": 0.557,
"step": 13274
},
{
"epoch": 0.84,
"grad_norm": 0.8876155614852905,
"learning_rate": 6.481513521426581e-07,
"loss": 0.5637,
"step": 13275
},
{
"epoch": 0.84,
"grad_norm": 0.8074238896369934,
"learning_rate": 6.476462457553473e-07,
"loss": 0.5298,
"step": 13276
},
{
"epoch": 0.84,
"grad_norm": 0.9412943124771118,
"learning_rate": 6.471413226305134e-07,
"loss": 0.5909,
"step": 13277
},
{
"epoch": 0.84,
"grad_norm": 0.9044212102890015,
"learning_rate": 6.466365827894133e-07,
"loss": 0.603,
"step": 13278
},
{
"epoch": 0.84,
"grad_norm": 0.8331887722015381,
"learning_rate": 6.461320262533055e-07,
"loss": 0.5901,
"step": 13279
},
{
"epoch": 0.84,
"grad_norm": 0.8750473856925964,
"learning_rate": 6.456276530434302e-07,
"loss": 0.5648,
"step": 13280
},
{
"epoch": 0.84,
"grad_norm": 0.91391921043396,
"learning_rate": 6.451234631810271e-07,
"loss": 0.5972,
"step": 13281
},
{
"epoch": 0.84,
"grad_norm": 0.9085570573806763,
"learning_rate": 6.446194566873254e-07,
"loss": 0.6251,
"step": 13282
},
{
"epoch": 0.84,
"grad_norm": 0.8852720260620117,
"learning_rate": 6.441156335835474e-07,
"loss": 0.5953,
"step": 13283
},
{
"epoch": 0.84,
"grad_norm": 0.82054603099823,
"learning_rate": 6.436119938909069e-07,
"loss": 0.534,
"step": 13284
},
{
"epoch": 0.84,
"grad_norm": 0.886782169342041,
"learning_rate": 6.431085376306112e-07,
"loss": 0.5974,
"step": 13285
},
{
"epoch": 0.84,
"grad_norm": 0.8668603897094727,
"learning_rate": 6.426052648238568e-07,
"loss": 0.5656,
"step": 13286
},
{
"epoch": 0.84,
"grad_norm": 0.8825658559799194,
"learning_rate": 6.421021754918383e-07,
"loss": 0.5434,
"step": 13287
},
{
"epoch": 0.84,
"grad_norm": 0.899597704410553,
"learning_rate": 6.415992696557361e-07,
"loss": 0.5724,
"step": 13288
},
{
"epoch": 0.84,
"grad_norm": 0.8868544101715088,
"learning_rate": 6.41096547336727e-07,
"loss": 0.569,
"step": 13289
},
{
"epoch": 0.84,
"grad_norm": 0.8770740628242493,
"learning_rate": 6.405940085559797e-07,
"loss": 0.5501,
"step": 13290
},
{
"epoch": 0.84,
"grad_norm": 0.8351693153381348,
"learning_rate": 6.400916533346518e-07,
"loss": 0.4812,
"step": 13291
},
{
"epoch": 0.84,
"grad_norm": 0.9115918278694153,
"learning_rate": 6.39589481693898e-07,
"loss": 0.5842,
"step": 13292
},
{
"epoch": 0.84,
"grad_norm": 0.8377058506011963,
"learning_rate": 6.390874936548635e-07,
"loss": 0.5911,
"step": 13293
},
{
"epoch": 0.84,
"grad_norm": 0.8732972145080566,
"learning_rate": 6.385856892386826e-07,
"loss": 0.5937,
"step": 13294
},
{
"epoch": 0.84,
"grad_norm": 0.9321759939193726,
"learning_rate": 6.380840684664869e-07,
"loss": 0.5681,
"step": 13295
},
{
"epoch": 0.84,
"grad_norm": 0.9192104339599609,
"learning_rate": 6.375826313593963e-07,
"loss": 0.6206,
"step": 13296
},
{
"epoch": 0.84,
"grad_norm": 0.9148771166801453,
"learning_rate": 6.37081377938526e-07,
"loss": 0.6008,
"step": 13297
},
{
"epoch": 0.84,
"grad_norm": 0.9211153984069824,
"learning_rate": 6.365803082249822e-07,
"loss": 0.5656,
"step": 13298
},
{
"epoch": 0.84,
"grad_norm": 0.8448777794837952,
"learning_rate": 6.360794222398603e-07,
"loss": 0.5574,
"step": 13299
},
{
"epoch": 0.84,
"grad_norm": 0.8851933479309082,
"learning_rate": 6.35578720004254e-07,
"loss": 0.601,
"step": 13300
},
{
"epoch": 0.84,
"grad_norm": 0.9539099335670471,
"learning_rate": 6.350782015392459e-07,
"loss": 0.6214,
"step": 13301
},
{
"epoch": 0.84,
"grad_norm": 0.9478552341461182,
"learning_rate": 6.345778668659097e-07,
"loss": 0.6167,
"step": 13302
},
{
"epoch": 0.84,
"grad_norm": 0.9490789175033569,
"learning_rate": 6.34077716005313e-07,
"loss": 0.5713,
"step": 13303
},
{
"epoch": 0.84,
"grad_norm": 0.9128775000572205,
"learning_rate": 6.335777489785161e-07,
"loss": 0.5915,
"step": 13304
},
{
"epoch": 0.84,
"grad_norm": 0.8613923788070679,
"learning_rate": 6.3307796580657e-07,
"loss": 0.6341,
"step": 13305
},
{
"epoch": 0.84,
"grad_norm": 0.9108010530471802,
"learning_rate": 6.325783665105206e-07,
"loss": 0.5904,
"step": 13306
},
{
"epoch": 0.84,
"grad_norm": 0.8792107701301575,
"learning_rate": 6.320789511114022e-07,
"loss": 0.5576,
"step": 13307
},
{
"epoch": 0.84,
"grad_norm": 0.8667570352554321,
"learning_rate": 6.315797196302432e-07,
"loss": 0.5332,
"step": 13308
},
{
"epoch": 0.84,
"grad_norm": 0.8885064721107483,
"learning_rate": 6.310806720880675e-07,
"loss": 0.5781,
"step": 13309
},
{
"epoch": 0.84,
"grad_norm": 0.8899162411689758,
"learning_rate": 6.305818085058852e-07,
"loss": 0.5392,
"step": 13310
},
{
"epoch": 0.84,
"grad_norm": 0.9016544222831726,
"learning_rate": 6.300831289047027e-07,
"loss": 0.5896,
"step": 13311
},
{
"epoch": 0.84,
"grad_norm": 0.8984227776527405,
"learning_rate": 6.295846333055184e-07,
"loss": 0.561,
"step": 13312
},
{
"epoch": 0.84,
"grad_norm": 0.898589015007019,
"learning_rate": 6.290863217293214e-07,
"loss": 0.5771,
"step": 13313
},
{
"epoch": 0.84,
"grad_norm": 0.9066430330276489,
"learning_rate": 6.285881941970951e-07,
"loss": 0.5539,
"step": 13314
},
{
"epoch": 0.84,
"grad_norm": 0.8809421062469482,
"learning_rate": 6.280902507298115e-07,
"loss": 0.5963,
"step": 13315
},
{
"epoch": 0.84,
"grad_norm": 0.9171636700630188,
"learning_rate": 6.275924913484377e-07,
"loss": 0.575,
"step": 13316
},
{
"epoch": 0.84,
"grad_norm": 0.8545477390289307,
"learning_rate": 6.270949160739359e-07,
"loss": 0.5312,
"step": 13317
},
{
"epoch": 0.84,
"grad_norm": 0.8714274764060974,
"learning_rate": 6.265975249272544e-07,
"loss": 0.5371,
"step": 13318
},
{
"epoch": 0.84,
"grad_norm": 0.9174915552139282,
"learning_rate": 6.261003179293368e-07,
"loss": 0.5563,
"step": 13319
},
{
"epoch": 0.84,
"grad_norm": 0.9050828218460083,
"learning_rate": 6.256032951011188e-07,
"loss": 0.5948,
"step": 13320
},
{
"epoch": 0.84,
"grad_norm": 0.9102218747138977,
"learning_rate": 6.25106456463529e-07,
"loss": 0.6076,
"step": 13321
},
{
"epoch": 0.84,
"grad_norm": 0.8744686245918274,
"learning_rate": 6.246098020374869e-07,
"loss": 0.6083,
"step": 13322
},
{
"epoch": 0.84,
"grad_norm": 0.8967841267585754,
"learning_rate": 6.241133318439063e-07,
"loss": 0.6014,
"step": 13323
},
{
"epoch": 0.84,
"grad_norm": 0.9219756722450256,
"learning_rate": 6.236170459036894e-07,
"loss": 0.5299,
"step": 13324
},
{
"epoch": 0.84,
"grad_norm": 0.958886981010437,
"learning_rate": 6.23120944237735e-07,
"loss": 0.6052,
"step": 13325
},
{
"epoch": 0.84,
"grad_norm": 0.8341507315635681,
"learning_rate": 6.226250268669309e-07,
"loss": 0.5881,
"step": 13326
},
{
"epoch": 0.84,
"grad_norm": 0.885211169719696,
"learning_rate": 6.221292938121598e-07,
"loss": 0.6086,
"step": 13327
},
{
"epoch": 0.84,
"grad_norm": 0.8747490644454956,
"learning_rate": 6.216337450942955e-07,
"loss": 0.5505,
"step": 13328
},
{
"epoch": 0.84,
"grad_norm": 0.8842592835426331,
"learning_rate": 6.211383807342008e-07,
"loss": 0.5602,
"step": 13329
},
{
"epoch": 0.84,
"grad_norm": 0.9496366381645203,
"learning_rate": 6.206432007527368e-07,
"loss": 0.5424,
"step": 13330
},
{
"epoch": 0.84,
"grad_norm": 0.9109143018722534,
"learning_rate": 6.201482051707542e-07,
"loss": 0.5971,
"step": 13331
},
{
"epoch": 0.84,
"grad_norm": 0.8485182523727417,
"learning_rate": 6.196533940090932e-07,
"loss": 0.5543,
"step": 13332
},
{
"epoch": 0.84,
"grad_norm": 1.0051995515823364,
"learning_rate": 6.191587672885896e-07,
"loss": 0.5688,
"step": 13333
},
{
"epoch": 0.84,
"grad_norm": 0.8793126940727234,
"learning_rate": 6.186643250300706e-07,
"loss": 0.5992,
"step": 13334
},
{
"epoch": 0.84,
"grad_norm": 0.9340550303459167,
"learning_rate": 6.18170067254355e-07,
"loss": 0.5385,
"step": 13335
},
{
"epoch": 0.84,
"grad_norm": 0.9096164107322693,
"learning_rate": 6.176759939822557e-07,
"loss": 0.551,
"step": 13336
},
{
"epoch": 0.84,
"grad_norm": 0.9081304669380188,
"learning_rate": 6.171821052345744e-07,
"loss": 0.5687,
"step": 13337
},
{
"epoch": 0.85,
"grad_norm": 0.943519651889801,
"learning_rate": 6.166884010321072e-07,
"loss": 0.6095,
"step": 13338
},
{
"epoch": 0.85,
"grad_norm": 0.8517118096351624,
"learning_rate": 6.161948813956447e-07,
"loss": 0.6042,
"step": 13339
},
{
"epoch": 0.85,
"grad_norm": 0.8399627804756165,
"learning_rate": 6.157015463459648e-07,
"loss": 0.5601,
"step": 13340
},
{
"epoch": 0.85,
"grad_norm": 0.8427531123161316,
"learning_rate": 6.152083959038407e-07,
"loss": 0.5183,
"step": 13341
},
{
"epoch": 0.85,
"grad_norm": 0.9020541310310364,
"learning_rate": 6.147154300900377e-07,
"loss": 0.5783,
"step": 13342
},
{
"epoch": 0.85,
"grad_norm": 0.9388177990913391,
"learning_rate": 6.142226489253122e-07,
"loss": 0.58,
"step": 13343
},
{
"epoch": 0.85,
"grad_norm": 0.8787881135940552,
"learning_rate": 6.137300524304151e-07,
"loss": 0.5774,
"step": 13344
},
{
"epoch": 0.85,
"grad_norm": 0.8807479739189148,
"learning_rate": 6.132376406260865e-07,
"loss": 0.5687,
"step": 13345
},
{
"epoch": 0.85,
"grad_norm": 0.8714962601661682,
"learning_rate": 6.127454135330585e-07,
"loss": 0.6012,
"step": 13346
},
{
"epoch": 0.85,
"grad_norm": 0.8867830038070679,
"learning_rate": 6.122533711720613e-07,
"loss": 0.5601,
"step": 13347
},
{
"epoch": 0.85,
"grad_norm": 0.9229559898376465,
"learning_rate": 6.1176151356381e-07,
"loss": 0.6503,
"step": 13348
},
{
"epoch": 0.85,
"grad_norm": 0.8665587902069092,
"learning_rate": 6.112698407290158e-07,
"loss": 0.6234,
"step": 13349
},
{
"epoch": 0.85,
"grad_norm": 0.8939769864082336,
"learning_rate": 6.107783526883809e-07,
"loss": 0.601,
"step": 13350
},
{
"epoch": 0.85,
"grad_norm": 0.8392643332481384,
"learning_rate": 6.102870494626006e-07,
"loss": 0.548,
"step": 13351
},
{
"epoch": 0.85,
"grad_norm": 0.9490659236907959,
"learning_rate": 6.097959310723633e-07,
"loss": 0.6169,
"step": 13352
},
{
"epoch": 0.85,
"grad_norm": 0.8439939618110657,
"learning_rate": 6.093049975383458e-07,
"loss": 0.5877,
"step": 13353
},
{
"epoch": 0.85,
"grad_norm": 0.9144013524055481,
"learning_rate": 6.08814248881221e-07,
"loss": 0.5566,
"step": 13354
},
{
"epoch": 0.85,
"grad_norm": 0.9243726134300232,
"learning_rate": 6.083236851216517e-07,
"loss": 0.5481,
"step": 13355
},
{
"epoch": 0.85,
"grad_norm": 0.9642614722251892,
"learning_rate": 6.078333062802949e-07,
"loss": 0.6463,
"step": 13356
},
{
"epoch": 0.85,
"grad_norm": 0.959270715713501,
"learning_rate": 6.073431123777984e-07,
"loss": 0.6111,
"step": 13357
},
{
"epoch": 0.85,
"grad_norm": 0.894008219242096,
"learning_rate": 6.068531034348035e-07,
"loss": 0.5951,
"step": 13358
},
{
"epoch": 0.85,
"grad_norm": 0.8639335632324219,
"learning_rate": 6.063632794719399e-07,
"loss": 0.5667,
"step": 13359
},
{
"epoch": 0.85,
"grad_norm": 0.9120550751686096,
"learning_rate": 6.058736405098359e-07,
"loss": 0.6197,
"step": 13360
},
{
"epoch": 0.85,
"grad_norm": 0.9336058497428894,
"learning_rate": 6.053841865691063e-07,
"loss": 0.6587,
"step": 13361
},
{
"epoch": 0.85,
"grad_norm": 0.8768007159233093,
"learning_rate": 6.048949176703606e-07,
"loss": 0.5653,
"step": 13362
},
{
"epoch": 0.85,
"grad_norm": 0.8573430180549622,
"learning_rate": 6.044058338342002e-07,
"loss": 0.5534,
"step": 13363
},
{
"epoch": 0.85,
"grad_norm": 0.8514514565467834,
"learning_rate": 6.039169350812191e-07,
"loss": 0.587,
"step": 13364
},
{
"epoch": 0.85,
"grad_norm": 0.9030587673187256,
"learning_rate": 6.034282214320031e-07,
"loss": 0.5804,
"step": 13365
},
{
"epoch": 0.85,
"grad_norm": 0.9698714017868042,
"learning_rate": 6.029396929071313e-07,
"loss": 0.6393,
"step": 13366
},
{
"epoch": 0.85,
"grad_norm": 0.9271089434623718,
"learning_rate": 6.024513495271705e-07,
"loss": 0.5814,
"step": 13367
},
{
"epoch": 0.85,
"grad_norm": 0.8566939234733582,
"learning_rate": 6.019631913126877e-07,
"loss": 0.4997,
"step": 13368
},
{
"epoch": 0.85,
"grad_norm": 0.9297276139259338,
"learning_rate": 6.014752182842343e-07,
"loss": 0.5583,
"step": 13369
},
{
"epoch": 0.85,
"grad_norm": 0.8690567016601562,
"learning_rate": 6.009874304623576e-07,
"loss": 0.5628,
"step": 13370
},
{
"epoch": 0.85,
"grad_norm": 0.8401360511779785,
"learning_rate": 6.004998278675988e-07,
"loss": 0.5212,
"step": 13371
},
{
"epoch": 0.85,
"grad_norm": 0.9174624681472778,
"learning_rate": 6.000124105204847e-07,
"loss": 0.5717,
"step": 13372
},
{
"epoch": 0.85,
"grad_norm": 0.9405276775360107,
"learning_rate": 5.995251784415435e-07,
"loss": 0.5852,
"step": 13373
},
{
"epoch": 0.85,
"grad_norm": 0.8843702673912048,
"learning_rate": 5.990381316512894e-07,
"loss": 0.5789,
"step": 13374
},
{
"epoch": 0.85,
"grad_norm": 0.9235939383506775,
"learning_rate": 5.985512701702284e-07,
"loss": 0.5835,
"step": 13375
},
{
"epoch": 0.85,
"grad_norm": 0.8629280924797058,
"learning_rate": 5.980645940188623e-07,
"loss": 0.5666,
"step": 13376
},
{
"epoch": 0.85,
"grad_norm": 0.9397252202033997,
"learning_rate": 5.975781032176831e-07,
"loss": 0.5569,
"step": 13377
},
{
"epoch": 0.85,
"grad_norm": 0.937901496887207,
"learning_rate": 5.970917977871749e-07,
"loss": 0.5956,
"step": 13378
},
{
"epoch": 0.85,
"grad_norm": 0.87679123878479,
"learning_rate": 5.966056777478152e-07,
"loss": 0.5403,
"step": 13379
},
{
"epoch": 0.85,
"grad_norm": 0.9953281283378601,
"learning_rate": 5.961197431200705e-07,
"loss": 0.5968,
"step": 13380
},
{
"epoch": 0.85,
"grad_norm": 0.9243939518928528,
"learning_rate": 5.956339939244044e-07,
"loss": 0.5376,
"step": 13381
},
{
"epoch": 0.85,
"grad_norm": 0.8928592205047607,
"learning_rate": 5.951484301812699e-07,
"loss": 0.5919,
"step": 13382
},
{
"epoch": 0.85,
"grad_norm": 0.9274805188179016,
"learning_rate": 5.946630519111107e-07,
"loss": 0.5901,
"step": 13383
},
{
"epoch": 0.85,
"grad_norm": 0.9007667303085327,
"learning_rate": 5.941778591343656e-07,
"loss": 0.5883,
"step": 13384
},
{
"epoch": 0.85,
"grad_norm": 0.8749024868011475,
"learning_rate": 5.936928518714641e-07,
"loss": 0.5715,
"step": 13385
},
{
"epoch": 0.85,
"grad_norm": 0.8843820691108704,
"learning_rate": 5.932080301428278e-07,
"loss": 0.5757,
"step": 13386
},
{
"epoch": 0.85,
"grad_norm": 0.88556969165802,
"learning_rate": 5.927233939688714e-07,
"loss": 0.6128,
"step": 13387
},
{
"epoch": 0.85,
"grad_norm": 0.8423168659210205,
"learning_rate": 5.922389433700021e-07,
"loss": 0.5531,
"step": 13388
},
{
"epoch": 0.85,
"grad_norm": 0.8334605693817139,
"learning_rate": 5.917546783666156e-07,
"loss": 0.5426,
"step": 13389
},
{
"epoch": 0.85,
"grad_norm": 0.9283615946769714,
"learning_rate": 5.912705989791062e-07,
"loss": 0.5903,
"step": 13390
},
{
"epoch": 0.85,
"grad_norm": 0.8497453927993774,
"learning_rate": 5.907867052278543e-07,
"loss": 0.5435,
"step": 13391
},
{
"epoch": 0.85,
"grad_norm": 0.9120060205459595,
"learning_rate": 5.903029971332353e-07,
"loss": 0.6199,
"step": 13392
},
{
"epoch": 0.85,
"grad_norm": 0.912470817565918,
"learning_rate": 5.898194747156171e-07,
"loss": 0.6023,
"step": 13393
},
{
"epoch": 0.85,
"grad_norm": 0.9185166358947754,
"learning_rate": 5.893361379953588e-07,
"loss": 0.5856,
"step": 13394
},
{
"epoch": 0.85,
"grad_norm": 0.9453598260879517,
"learning_rate": 5.888529869928122e-07,
"loss": 0.6039,
"step": 13395
},
{
"epoch": 0.85,
"grad_norm": 0.9415664672851562,
"learning_rate": 5.883700217283223e-07,
"loss": 0.5434,
"step": 13396
},
{
"epoch": 0.85,
"grad_norm": 0.9080526232719421,
"learning_rate": 5.878872422222215e-07,
"loss": 0.5224,
"step": 13397
},
{
"epoch": 0.85,
"grad_norm": 0.9032747745513916,
"learning_rate": 5.874046484948426e-07,
"loss": 0.6058,
"step": 13398
},
{
"epoch": 0.85,
"grad_norm": 0.9231809377670288,
"learning_rate": 5.869222405665026e-07,
"loss": 0.5567,
"step": 13399
},
{
"epoch": 0.85,
"grad_norm": 0.9746513366699219,
"learning_rate": 5.864400184575153e-07,
"loss": 0.5836,
"step": 13400
},
{
"epoch": 0.85,
"grad_norm": 0.927158534526825,
"learning_rate": 5.859579821881855e-07,
"loss": 0.6028,
"step": 13401
},
{
"epoch": 0.85,
"grad_norm": 0.8797077536582947,
"learning_rate": 5.854761317788082e-07,
"loss": 0.5583,
"step": 13402
},
{
"epoch": 0.85,
"grad_norm": 0.8659250736236572,
"learning_rate": 5.849944672496749e-07,
"loss": 0.6296,
"step": 13403
},
{
"epoch": 0.85,
"grad_norm": 0.9689622521400452,
"learning_rate": 5.845129886210671e-07,
"loss": 0.6281,
"step": 13404
},
{
"epoch": 0.85,
"grad_norm": 0.8522788286209106,
"learning_rate": 5.840316959132558e-07,
"loss": 0.5162,
"step": 13405
},
{
"epoch": 0.85,
"grad_norm": 0.904559850692749,
"learning_rate": 5.835505891465076e-07,
"loss": 0.5924,
"step": 13406
},
{
"epoch": 0.85,
"grad_norm": 0.8650006651878357,
"learning_rate": 5.830696683410802e-07,
"loss": 0.5552,
"step": 13407
},
{
"epoch": 0.85,
"grad_norm": 0.8110765218734741,
"learning_rate": 5.825889335172241e-07,
"loss": 0.5428,
"step": 13408
},
{
"epoch": 0.85,
"grad_norm": 0.8690059185028076,
"learning_rate": 5.821083846951819e-07,
"loss": 0.6166,
"step": 13409
},
{
"epoch": 0.85,
"grad_norm": 0.8721504807472229,
"learning_rate": 5.816280218951847e-07,
"loss": 0.5206,
"step": 13410
},
{
"epoch": 0.85,
"grad_norm": 0.8614574074745178,
"learning_rate": 5.811478451374625e-07,
"loss": 0.563,
"step": 13411
},
{
"epoch": 0.85,
"grad_norm": 0.9383098483085632,
"learning_rate": 5.806678544422334e-07,
"loss": 0.6108,
"step": 13412
},
{
"epoch": 0.85,
"grad_norm": 0.8840879201889038,
"learning_rate": 5.801880498297057e-07,
"loss": 0.57,
"step": 13413
},
{
"epoch": 0.85,
"grad_norm": 0.8915720582008362,
"learning_rate": 5.797084313200846e-07,
"loss": 0.5684,
"step": 13414
},
{
"epoch": 0.85,
"grad_norm": 0.8662636876106262,
"learning_rate": 5.792289989335637e-07,
"loss": 0.6,
"step": 13415
},
{
"epoch": 0.85,
"grad_norm": 0.9727985858917236,
"learning_rate": 5.787497526903313e-07,
"loss": 0.6059,
"step": 13416
},
{
"epoch": 0.85,
"grad_norm": 0.8884052634239197,
"learning_rate": 5.782706926105674e-07,
"loss": 0.6134,
"step": 13417
},
{
"epoch": 0.85,
"grad_norm": 0.8743575215339661,
"learning_rate": 5.777918187144416e-07,
"loss": 0.5764,
"step": 13418
},
{
"epoch": 0.85,
"grad_norm": 0.9156510233879089,
"learning_rate": 5.773131310221169e-07,
"loss": 0.6167,
"step": 13419
},
{
"epoch": 0.85,
"grad_norm": 0.898995578289032,
"learning_rate": 5.768346295537536e-07,
"loss": 0.6059,
"step": 13420
},
{
"epoch": 0.85,
"grad_norm": 0.8932662606239319,
"learning_rate": 5.76356314329496e-07,
"loss": 0.6145,
"step": 13421
},
{
"epoch": 0.85,
"grad_norm": 0.9043698310852051,
"learning_rate": 5.758781853694845e-07,
"loss": 0.6164,
"step": 13422
},
{
"epoch": 0.85,
"grad_norm": 0.8941948413848877,
"learning_rate": 5.754002426938532e-07,
"loss": 0.5835,
"step": 13423
},
{
"epoch": 0.85,
"grad_norm": 0.871859073638916,
"learning_rate": 5.749224863227249e-07,
"loss": 0.5474,
"step": 13424
},
{
"epoch": 0.85,
"grad_norm": 0.9060640335083008,
"learning_rate": 5.744449162762183e-07,
"loss": 0.5546,
"step": 13425
},
{
"epoch": 0.85,
"grad_norm": 0.9025922417640686,
"learning_rate": 5.739675325744398e-07,
"loss": 0.5765,
"step": 13426
},
{
"epoch": 0.85,
"grad_norm": 0.9106086492538452,
"learning_rate": 5.734903352374904e-07,
"loss": 0.5614,
"step": 13427
},
{
"epoch": 0.85,
"grad_norm": 0.8653062582015991,
"learning_rate": 5.730133242854663e-07,
"loss": 0.5451,
"step": 13428
},
{
"epoch": 0.85,
"grad_norm": 0.8670951724052429,
"learning_rate": 5.725364997384498e-07,
"loss": 0.5791,
"step": 13429
},
{
"epoch": 0.85,
"grad_norm": 0.8815758228302002,
"learning_rate": 5.720598616165196e-07,
"loss": 0.5999,
"step": 13430
},
{
"epoch": 0.85,
"grad_norm": 0.8936425447463989,
"learning_rate": 5.715834099397455e-07,
"loss": 0.5746,
"step": 13431
},
{
"epoch": 0.85,
"grad_norm": 0.8447661995887756,
"learning_rate": 5.711071447281868e-07,
"loss": 0.5322,
"step": 13432
},
{
"epoch": 0.85,
"grad_norm": 0.8403939008712769,
"learning_rate": 5.70631066001901e-07,
"loss": 0.5512,
"step": 13433
},
{
"epoch": 0.85,
"grad_norm": 0.8799472451210022,
"learning_rate": 5.701551737809319e-07,
"loss": 0.5443,
"step": 13434
},
{
"epoch": 0.85,
"grad_norm": 0.9245263934135437,
"learning_rate": 5.696794680853179e-07,
"loss": 0.558,
"step": 13435
},
{
"epoch": 0.85,
"grad_norm": 0.8561593890190125,
"learning_rate": 5.692039489350892e-07,
"loss": 0.5743,
"step": 13436
},
{
"epoch": 0.85,
"grad_norm": 0.8703195452690125,
"learning_rate": 5.687286163502687e-07,
"loss": 0.5518,
"step": 13437
},
{
"epoch": 0.85,
"grad_norm": 0.9124912619590759,
"learning_rate": 5.682534703508713e-07,
"loss": 0.5345,
"step": 13438
},
{
"epoch": 0.85,
"grad_norm": 0.9110020995140076,
"learning_rate": 5.67778510956904e-07,
"loss": 0.5506,
"step": 13439
},
{
"epoch": 0.85,
"grad_norm": 0.8271638751029968,
"learning_rate": 5.673037381883634e-07,
"loss": 0.4917,
"step": 13440
},
{
"epoch": 0.85,
"grad_norm": 0.8652800917625427,
"learning_rate": 5.668291520652436e-07,
"loss": 0.5618,
"step": 13441
},
{
"epoch": 0.85,
"grad_norm": 0.8255113959312439,
"learning_rate": 5.663547526075258e-07,
"loss": 0.5208,
"step": 13442
},
{
"epoch": 0.85,
"grad_norm": 0.8780609965324402,
"learning_rate": 5.658805398351858e-07,
"loss": 0.6078,
"step": 13443
},
{
"epoch": 0.85,
"grad_norm": 0.8495383858680725,
"learning_rate": 5.654065137681907e-07,
"loss": 0.5683,
"step": 13444
},
{
"epoch": 0.85,
"grad_norm": 0.9232254028320312,
"learning_rate": 5.64932674426501e-07,
"loss": 0.6365,
"step": 13445
},
{
"epoch": 0.85,
"grad_norm": 0.8610829710960388,
"learning_rate": 5.644590218300672e-07,
"loss": 0.5327,
"step": 13446
},
{
"epoch": 0.85,
"grad_norm": 0.8897087574005127,
"learning_rate": 5.639855559988356e-07,
"loss": 0.5343,
"step": 13447
},
{
"epoch": 0.85,
"grad_norm": 0.867492139339447,
"learning_rate": 5.63512276952739e-07,
"loss": 0.6033,
"step": 13448
},
{
"epoch": 0.85,
"grad_norm": 0.9001726508140564,
"learning_rate": 5.630391847117073e-07,
"loss": 0.6116,
"step": 13449
},
{
"epoch": 0.85,
"grad_norm": 0.9186358451843262,
"learning_rate": 5.625662792956604e-07,
"loss": 0.5888,
"step": 13450
},
{
"epoch": 0.85,
"grad_norm": 0.8732519745826721,
"learning_rate": 5.620935607245109e-07,
"loss": 0.5883,
"step": 13451
},
{
"epoch": 0.85,
"grad_norm": 0.8616448044776917,
"learning_rate": 5.616210290181628e-07,
"loss": 0.5351,
"step": 13452
},
{
"epoch": 0.85,
"grad_norm": 0.9044156074523926,
"learning_rate": 5.611486841965136e-07,
"loss": 0.5497,
"step": 13453
},
{
"epoch": 0.85,
"grad_norm": 0.8665462732315063,
"learning_rate": 5.606765262794512e-07,
"loss": 0.5753,
"step": 13454
},
{
"epoch": 0.85,
"grad_norm": 0.9290836453437805,
"learning_rate": 5.602045552868585e-07,
"loss": 0.5808,
"step": 13455
},
{
"epoch": 0.85,
"grad_norm": 0.8570681810379028,
"learning_rate": 5.597327712386058e-07,
"loss": 0.6147,
"step": 13456
},
{
"epoch": 0.85,
"grad_norm": 0.8335081338882446,
"learning_rate": 5.592611741545594e-07,
"loss": 0.5146,
"step": 13457
},
{
"epoch": 0.85,
"grad_norm": 0.8507091999053955,
"learning_rate": 5.58789764054577e-07,
"loss": 0.5839,
"step": 13458
},
{
"epoch": 0.85,
"grad_norm": 0.8704282641410828,
"learning_rate": 5.583185409585079e-07,
"loss": 0.5506,
"step": 13459
},
{
"epoch": 0.85,
"grad_norm": 0.9266949892044067,
"learning_rate": 5.578475048861931e-07,
"loss": 0.5791,
"step": 13460
},
{
"epoch": 0.85,
"grad_norm": 0.8802145719528198,
"learning_rate": 5.573766558574684e-07,
"loss": 0.5466,
"step": 13461
},
{
"epoch": 0.85,
"grad_norm": 0.7950432300567627,
"learning_rate": 5.569059938921551e-07,
"loss": 0.5532,
"step": 13462
},
{
"epoch": 0.85,
"grad_norm": 0.850308895111084,
"learning_rate": 5.564355190100768e-07,
"loss": 0.5333,
"step": 13463
},
{
"epoch": 0.85,
"grad_norm": 0.8470205664634705,
"learning_rate": 5.559652312310393e-07,
"loss": 0.5449,
"step": 13464
},
{
"epoch": 0.85,
"grad_norm": 0.8567230701446533,
"learning_rate": 5.554951305748462e-07,
"loss": 0.5504,
"step": 13465
},
{
"epoch": 0.85,
"grad_norm": 0.8885741829872131,
"learning_rate": 5.550252170612924e-07,
"loss": 0.5441,
"step": 13466
},
{
"epoch": 0.85,
"grad_norm": 0.9018322229385376,
"learning_rate": 5.545554907101636e-07,
"loss": 0.5781,
"step": 13467
},
{
"epoch": 0.85,
"grad_norm": 0.8181560039520264,
"learning_rate": 5.540859515412378e-07,
"loss": 0.5483,
"step": 13468
},
{
"epoch": 0.85,
"grad_norm": 0.8753595352172852,
"learning_rate": 5.536165995742882e-07,
"loss": 0.5315,
"step": 13469
},
{
"epoch": 0.85,
"grad_norm": 0.8592386841773987,
"learning_rate": 5.531474348290733e-07,
"loss": 0.5426,
"step": 13470
},
{
"epoch": 0.85,
"grad_norm": 0.8794154524803162,
"learning_rate": 5.526784573253525e-07,
"loss": 0.5856,
"step": 13471
},
{
"epoch": 0.85,
"grad_norm": 0.9070557951927185,
"learning_rate": 5.522096670828703e-07,
"loss": 0.5833,
"step": 13472
},
{
"epoch": 0.85,
"grad_norm": 0.8681169152259827,
"learning_rate": 5.517410641213656e-07,
"loss": 0.5704,
"step": 13473
},
{
"epoch": 0.85,
"grad_norm": 0.8716253042221069,
"learning_rate": 5.512726484605707e-07,
"loss": 0.557,
"step": 13474
},
{
"epoch": 0.85,
"grad_norm": 0.8904623985290527,
"learning_rate": 5.508044201202084e-07,
"loss": 0.5565,
"step": 13475
},
{
"epoch": 0.85,
"grad_norm": 0.842241644859314,
"learning_rate": 5.503363791199945e-07,
"loss": 0.526,
"step": 13476
},
{
"epoch": 0.85,
"grad_norm": 0.8667955994606018,
"learning_rate": 5.49868525479637e-07,
"loss": 0.6341,
"step": 13477
},
{
"epoch": 0.85,
"grad_norm": 0.8526463508605957,
"learning_rate": 5.494008592188344e-07,
"loss": 0.527,
"step": 13478
},
{
"epoch": 0.85,
"grad_norm": 0.8465002775192261,
"learning_rate": 5.489333803572788e-07,
"loss": 0.5513,
"step": 13479
},
{
"epoch": 0.85,
"grad_norm": 0.802689254283905,
"learning_rate": 5.484660889146548e-07,
"loss": 0.5247,
"step": 13480
},
{
"epoch": 0.85,
"grad_norm": 0.9201193451881409,
"learning_rate": 5.479989849106381e-07,
"loss": 0.5893,
"step": 13481
},
{
"epoch": 0.85,
"grad_norm": 0.8439991474151611,
"learning_rate": 5.475320683648977e-07,
"loss": 0.5606,
"step": 13482
},
{
"epoch": 0.85,
"grad_norm": 0.8193072080612183,
"learning_rate": 5.470653392970904e-07,
"loss": 0.5555,
"step": 13483
},
{
"epoch": 0.85,
"grad_norm": 0.876397967338562,
"learning_rate": 5.465987977268727e-07,
"loss": 0.5745,
"step": 13484
},
{
"epoch": 0.85,
"grad_norm": 0.9382455348968506,
"learning_rate": 5.46132443673888e-07,
"loss": 0.6023,
"step": 13485
},
{
"epoch": 0.85,
"grad_norm": 0.9134024977684021,
"learning_rate": 5.456662771577714e-07,
"loss": 0.56,
"step": 13486
},
{
"epoch": 0.85,
"grad_norm": 0.9391716718673706,
"learning_rate": 5.452002981981519e-07,
"loss": 0.6151,
"step": 13487
},
{
"epoch": 0.85,
"grad_norm": 0.9424962401390076,
"learning_rate": 5.447345068146515e-07,
"loss": 0.6357,
"step": 13488
},
{
"epoch": 0.85,
"grad_norm": 0.8668440580368042,
"learning_rate": 5.442689030268816e-07,
"loss": 0.5539,
"step": 13489
},
{
"epoch": 0.85,
"grad_norm": 0.8978198170661926,
"learning_rate": 5.438034868544495e-07,
"loss": 0.6061,
"step": 13490
},
{
"epoch": 0.85,
"grad_norm": 0.8872178196907043,
"learning_rate": 5.433382583169478e-07,
"loss": 0.5772,
"step": 13491
},
{
"epoch": 0.85,
"grad_norm": 0.9086841344833374,
"learning_rate": 5.428732174339702e-07,
"loss": 0.584,
"step": 13492
},
{
"epoch": 0.85,
"grad_norm": 0.8926877975463867,
"learning_rate": 5.424083642250966e-07,
"loss": 0.5608,
"step": 13493
},
{
"epoch": 0.85,
"grad_norm": 0.9636724591255188,
"learning_rate": 5.419436987098991e-07,
"loss": 0.6392,
"step": 13494
},
{
"epoch": 0.85,
"grad_norm": 0.8971894383430481,
"learning_rate": 5.414792209079445e-07,
"loss": 0.5755,
"step": 13495
},
{
"epoch": 0.86,
"grad_norm": 0.8618263006210327,
"learning_rate": 5.410149308387891e-07,
"loss": 0.6396,
"step": 13496
},
{
"epoch": 0.86,
"grad_norm": 0.8926728963851929,
"learning_rate": 5.405508285219835e-07,
"loss": 0.5824,
"step": 13497
},
{
"epoch": 0.86,
"grad_norm": 0.8346815705299377,
"learning_rate": 5.400869139770704e-07,
"loss": 0.5066,
"step": 13498
},
{
"epoch": 0.86,
"grad_norm": 0.9091081023216248,
"learning_rate": 5.396231872235819e-07,
"loss": 0.5603,
"step": 13499
},
{
"epoch": 0.86,
"grad_norm": 0.9015220403671265,
"learning_rate": 5.391596482810424e-07,
"loss": 0.5675,
"step": 13500
},
{
"epoch": 0.86,
"grad_norm": 0.9150410294532776,
"learning_rate": 5.386962971689746e-07,
"loss": 0.5732,
"step": 13501
},
{
"epoch": 0.86,
"grad_norm": 0.8379479050636292,
"learning_rate": 5.382331339068853e-07,
"loss": 0.5747,
"step": 13502
},
{
"epoch": 0.86,
"grad_norm": 0.8525556325912476,
"learning_rate": 5.377701585142769e-07,
"loss": 0.5536,
"step": 13503
},
{
"epoch": 0.86,
"grad_norm": 0.9342008233070374,
"learning_rate": 5.373073710106441e-07,
"loss": 0.6113,
"step": 13504
},
{
"epoch": 0.86,
"grad_norm": 0.9186147451400757,
"learning_rate": 5.368447714154734e-07,
"loss": 0.5781,
"step": 13505
},
{
"epoch": 0.86,
"grad_norm": 0.8697748780250549,
"learning_rate": 5.363823597482443e-07,
"loss": 0.5869,
"step": 13506
},
{
"epoch": 0.86,
"grad_norm": 0.8578813076019287,
"learning_rate": 5.359201360284255e-07,
"loss": 0.5598,
"step": 13507
},
{
"epoch": 0.86,
"grad_norm": 0.9658546447753906,
"learning_rate": 5.354581002754799e-07,
"loss": 0.5963,
"step": 13508
},
{
"epoch": 0.86,
"grad_norm": 0.8002378344535828,
"learning_rate": 5.349962525088631e-07,
"loss": 0.5307,
"step": 13509
},
{
"epoch": 0.86,
"grad_norm": 0.8562396764755249,
"learning_rate": 5.345345927480211e-07,
"loss": 0.5668,
"step": 13510
},
{
"epoch": 0.86,
"grad_norm": 0.8851287961006165,
"learning_rate": 5.340731210123934e-07,
"loss": 0.5659,
"step": 13511
},
{
"epoch": 0.86,
"grad_norm": 0.9325246214866638,
"learning_rate": 5.336118373214116e-07,
"loss": 0.6068,
"step": 13512
},
{
"epoch": 0.86,
"grad_norm": 0.9005350470542908,
"learning_rate": 5.331507416944965e-07,
"loss": 0.5734,
"step": 13513
},
{
"epoch": 0.86,
"grad_norm": 0.915073573589325,
"learning_rate": 5.326898341510655e-07,
"loss": 0.6254,
"step": 13514
},
{
"epoch": 0.86,
"grad_norm": 0.8757150173187256,
"learning_rate": 5.322291147105246e-07,
"loss": 0.5644,
"step": 13515
},
{
"epoch": 0.86,
"grad_norm": 0.8947983384132385,
"learning_rate": 5.317685833922737e-07,
"loss": 0.6423,
"step": 13516
},
{
"epoch": 0.86,
"grad_norm": 0.941947877407074,
"learning_rate": 5.313082402157039e-07,
"loss": 0.5757,
"step": 13517
},
{
"epoch": 0.86,
"grad_norm": 0.8160790801048279,
"learning_rate": 5.308480852001979e-07,
"loss": 0.5549,
"step": 13518
},
{
"epoch": 0.86,
"grad_norm": 0.8949527740478516,
"learning_rate": 5.303881183651327e-07,
"loss": 0.5,
"step": 13519
},
{
"epoch": 0.86,
"grad_norm": 0.9550206065177917,
"learning_rate": 5.29928339729876e-07,
"loss": 0.6233,
"step": 13520
},
{
"epoch": 0.86,
"grad_norm": 0.8570389747619629,
"learning_rate": 5.294687493137845e-07,
"loss": 0.5913,
"step": 13521
},
{
"epoch": 0.86,
"grad_norm": 0.8469735980033875,
"learning_rate": 5.290093471362145e-07,
"loss": 0.5931,
"step": 13522
},
{
"epoch": 0.86,
"grad_norm": 0.8493378162384033,
"learning_rate": 5.28550133216506e-07,
"loss": 0.608,
"step": 13523
},
{
"epoch": 0.86,
"grad_norm": 0.8677387237548828,
"learning_rate": 5.28091107573997e-07,
"loss": 0.5272,
"step": 13524
},
{
"epoch": 0.86,
"grad_norm": 0.8398542404174805,
"learning_rate": 5.27632270228014e-07,
"loss": 0.5557,
"step": 13525
},
{
"epoch": 0.86,
"grad_norm": 0.9466037154197693,
"learning_rate": 5.271736211978784e-07,
"loss": 0.5509,
"step": 13526
},
{
"epoch": 0.86,
"grad_norm": 0.9450697302818298,
"learning_rate": 5.267151605029014e-07,
"loss": 0.5604,
"step": 13527
},
{
"epoch": 0.86,
"grad_norm": 0.8523156046867371,
"learning_rate": 5.262568881623892e-07,
"loss": 0.5634,
"step": 13528
},
{
"epoch": 0.86,
"grad_norm": 0.8883264660835266,
"learning_rate": 5.257988041956347e-07,
"loss": 0.543,
"step": 13529
},
{
"epoch": 0.86,
"grad_norm": 0.9195562601089478,
"learning_rate": 5.253409086219274e-07,
"loss": 0.6409,
"step": 13530
},
{
"epoch": 0.86,
"grad_norm": 0.8769651055335999,
"learning_rate": 5.248832014605503e-07,
"loss": 0.542,
"step": 13531
},
{
"epoch": 0.86,
"grad_norm": 0.9406867623329163,
"learning_rate": 5.244256827307726e-07,
"loss": 0.6345,
"step": 13532
},
{
"epoch": 0.86,
"grad_norm": 0.811181366443634,
"learning_rate": 5.239683524518596e-07,
"loss": 0.5724,
"step": 13533
},
{
"epoch": 0.86,
"grad_norm": 0.9315853714942932,
"learning_rate": 5.23511210643069e-07,
"loss": 0.5907,
"step": 13534
},
{
"epoch": 0.86,
"grad_norm": 0.852668821811676,
"learning_rate": 5.230542573236485e-07,
"loss": 0.5694,
"step": 13535
},
{
"epoch": 0.86,
"grad_norm": 0.8345797657966614,
"learning_rate": 5.225974925128402e-07,
"loss": 0.5513,
"step": 13536
},
{
"epoch": 0.86,
"grad_norm": 0.9633619785308838,
"learning_rate": 5.221409162298741e-07,
"loss": 0.607,
"step": 13537
},
{
"epoch": 0.86,
"grad_norm": 0.8628314733505249,
"learning_rate": 5.216845284939764e-07,
"loss": 0.5985,
"step": 13538
},
{
"epoch": 0.86,
"grad_norm": 0.8650707602500916,
"learning_rate": 5.212283293243658e-07,
"loss": 0.5438,
"step": 13539
},
{
"epoch": 0.86,
"grad_norm": 0.8653766512870789,
"learning_rate": 5.207723187402491e-07,
"loss": 0.521,
"step": 13540
},
{
"epoch": 0.86,
"grad_norm": 0.9244462847709656,
"learning_rate": 5.203164967608282e-07,
"loss": 0.6332,
"step": 13541
},
{
"epoch": 0.86,
"grad_norm": 0.9744123816490173,
"learning_rate": 5.198608634052965e-07,
"loss": 0.6545,
"step": 13542
},
{
"epoch": 0.86,
"grad_norm": 0.8849944472312927,
"learning_rate": 5.194054186928365e-07,
"loss": 0.5575,
"step": 13543
},
{
"epoch": 0.86,
"grad_norm": 0.8665662407875061,
"learning_rate": 5.189501626426297e-07,
"loss": 0.5634,
"step": 13544
},
{
"epoch": 0.86,
"grad_norm": 0.9209324717521667,
"learning_rate": 5.184950952738421e-07,
"loss": 0.6306,
"step": 13545
},
{
"epoch": 0.86,
"grad_norm": 0.86234050989151,
"learning_rate": 5.180402166056359e-07,
"loss": 0.5072,
"step": 13546
},
{
"epoch": 0.86,
"grad_norm": 0.9226478338241577,
"learning_rate": 5.175855266571644e-07,
"loss": 0.6141,
"step": 13547
},
{
"epoch": 0.86,
"grad_norm": 0.9241039752960205,
"learning_rate": 5.171310254475737e-07,
"loss": 0.5793,
"step": 13548
},
{
"epoch": 0.86,
"grad_norm": 0.8989474177360535,
"learning_rate": 5.166767129960004e-07,
"loss": 0.5708,
"step": 13549
},
{
"epoch": 0.86,
"grad_norm": 0.8441492915153503,
"learning_rate": 5.162225893215755e-07,
"loss": 0.5564,
"step": 13550
},
{
"epoch": 0.86,
"grad_norm": 0.8294525742530823,
"learning_rate": 5.157686544434176e-07,
"loss": 0.553,
"step": 13551
},
{
"epoch": 0.86,
"grad_norm": 0.8823322057723999,
"learning_rate": 5.153149083806436e-07,
"loss": 0.5434,
"step": 13552
},
{
"epoch": 0.86,
"grad_norm": 0.9079649448394775,
"learning_rate": 5.14861351152357e-07,
"loss": 0.6156,
"step": 13553
},
{
"epoch": 0.86,
"grad_norm": 0.8697636723518372,
"learning_rate": 5.144079827776566e-07,
"loss": 0.5319,
"step": 13554
},
{
"epoch": 0.86,
"grad_norm": 0.8235500454902649,
"learning_rate": 5.139548032756325e-07,
"loss": 0.5539,
"step": 13555
},
{
"epoch": 0.86,
"grad_norm": 0.8555493354797363,
"learning_rate": 5.13501812665364e-07,
"loss": 0.5011,
"step": 13556
},
{
"epoch": 0.86,
"grad_norm": 0.8816463351249695,
"learning_rate": 5.130490109659275e-07,
"loss": 0.5324,
"step": 13557
},
{
"epoch": 0.86,
"grad_norm": 0.8975476622581482,
"learning_rate": 5.125963981963894e-07,
"loss": 0.5839,
"step": 13558
},
{
"epoch": 0.86,
"grad_norm": 0.8355741500854492,
"learning_rate": 5.12143974375805e-07,
"loss": 0.5476,
"step": 13559
},
{
"epoch": 0.86,
"grad_norm": 0.8712900876998901,
"learning_rate": 5.116917395232262e-07,
"loss": 0.6212,
"step": 13560
},
{
"epoch": 0.86,
"grad_norm": 0.8691787123680115,
"learning_rate": 5.112396936576947e-07,
"loss": 0.5257,
"step": 13561
},
{
"epoch": 0.86,
"grad_norm": 0.860202968120575,
"learning_rate": 5.107878367982438e-07,
"loss": 0.5328,
"step": 13562
},
{
"epoch": 0.86,
"grad_norm": 0.9462293386459351,
"learning_rate": 5.103361689639019e-07,
"loss": 0.6081,
"step": 13563
},
{
"epoch": 0.86,
"grad_norm": 0.9243309497833252,
"learning_rate": 5.098846901736832e-07,
"loss": 0.5952,
"step": 13564
},
{
"epoch": 0.86,
"grad_norm": 0.8597437739372253,
"learning_rate": 5.094334004466012e-07,
"loss": 0.6039,
"step": 13565
},
{
"epoch": 0.86,
"grad_norm": 0.9059598445892334,
"learning_rate": 5.089822998016586e-07,
"loss": 0.5546,
"step": 13566
},
{
"epoch": 0.86,
"grad_norm": 0.9531145691871643,
"learning_rate": 5.085313882578469e-07,
"loss": 0.5446,
"step": 13567
},
{
"epoch": 0.86,
"grad_norm": 0.923179566860199,
"learning_rate": 5.080806658341536e-07,
"loss": 0.5803,
"step": 13568
},
{
"epoch": 0.86,
"grad_norm": 0.9919398427009583,
"learning_rate": 5.076301325495575e-07,
"loss": 0.6093,
"step": 13569
},
{
"epoch": 0.86,
"grad_norm": 0.8127473592758179,
"learning_rate": 5.071797884230284e-07,
"loss": 0.528,
"step": 13570
},
{
"epoch": 0.86,
"grad_norm": 0.9124990701675415,
"learning_rate": 5.067296334735306e-07,
"loss": 0.6227,
"step": 13571
},
{
"epoch": 0.86,
"grad_norm": 0.9629392623901367,
"learning_rate": 5.062796677200154e-07,
"loss": 0.613,
"step": 13572
},
{
"epoch": 0.86,
"grad_norm": 0.921553373336792,
"learning_rate": 5.058298911814302e-07,
"loss": 0.5961,
"step": 13573
},
{
"epoch": 0.86,
"grad_norm": 0.9422236680984497,
"learning_rate": 5.053803038767158e-07,
"loss": 0.5932,
"step": 13574
},
{
"epoch": 0.86,
"grad_norm": 0.8603041172027588,
"learning_rate": 5.049309058248004e-07,
"loss": 0.528,
"step": 13575
},
{
"epoch": 0.86,
"grad_norm": 0.8307815790176392,
"learning_rate": 5.044816970446076e-07,
"loss": 0.5176,
"step": 13576
},
{
"epoch": 0.86,
"grad_norm": 0.8835110068321228,
"learning_rate": 5.040326775550514e-07,
"loss": 0.5863,
"step": 13577
},
{
"epoch": 0.86,
"grad_norm": 0.9652464985847473,
"learning_rate": 5.035838473750393e-07,
"loss": 0.5984,
"step": 13578
},
{
"epoch": 0.86,
"grad_norm": 0.8423542380332947,
"learning_rate": 5.031352065234702e-07,
"loss": 0.5387,
"step": 13579
},
{
"epoch": 0.86,
"grad_norm": 0.8693512678146362,
"learning_rate": 5.026867550192327e-07,
"loss": 0.5339,
"step": 13580
},
{
"epoch": 0.86,
"grad_norm": 0.9612827301025391,
"learning_rate": 5.022384928812107e-07,
"loss": 0.5946,
"step": 13581
},
{
"epoch": 0.86,
"grad_norm": 0.8645419478416443,
"learning_rate": 5.017904201282808e-07,
"loss": 0.5286,
"step": 13582
},
{
"epoch": 0.86,
"grad_norm": 0.875821053981781,
"learning_rate": 5.013425367793074e-07,
"loss": 0.5546,
"step": 13583
},
{
"epoch": 0.86,
"grad_norm": 0.8827986121177673,
"learning_rate": 5.008948428531496e-07,
"loss": 0.5512,
"step": 13584
},
{
"epoch": 0.86,
"grad_norm": 0.8592386245727539,
"learning_rate": 5.004473383686592e-07,
"loss": 0.5975,
"step": 13585
},
{
"epoch": 0.86,
"grad_norm": 0.9252444505691528,
"learning_rate": 5.000000233446783e-07,
"loss": 0.5423,
"step": 13586
},
{
"epoch": 0.86,
"grad_norm": 0.893185019493103,
"learning_rate": 4.99552897800043e-07,
"loss": 0.5593,
"step": 13587
},
{
"epoch": 0.86,
"grad_norm": 0.8774006366729736,
"learning_rate": 4.991059617535781e-07,
"loss": 0.5636,
"step": 13588
},
{
"epoch": 0.86,
"grad_norm": 0.8796536922454834,
"learning_rate": 4.986592152241043e-07,
"loss": 0.6007,
"step": 13589
},
{
"epoch": 0.86,
"grad_norm": 0.8507401943206787,
"learning_rate": 4.982126582304314e-07,
"loss": 0.5618,
"step": 13590
},
{
"epoch": 0.86,
"grad_norm": 0.9354941248893738,
"learning_rate": 4.977662907913633e-07,
"loss": 0.6212,
"step": 13591
},
{
"epoch": 0.86,
"grad_norm": 0.8648061156272888,
"learning_rate": 4.973201129256943e-07,
"loss": 0.5562,
"step": 13592
},
{
"epoch": 0.86,
"grad_norm": 0.9033337831497192,
"learning_rate": 4.968741246522129e-07,
"loss": 0.6071,
"step": 13593
},
{
"epoch": 0.86,
"grad_norm": 0.9331035017967224,
"learning_rate": 4.964283259896945e-07,
"loss": 0.568,
"step": 13594
},
{
"epoch": 0.86,
"grad_norm": 0.903471052646637,
"learning_rate": 4.959827169569136e-07,
"loss": 0.542,
"step": 13595
},
{
"epoch": 0.86,
"grad_norm": 0.8744809627532959,
"learning_rate": 4.955372975726336e-07,
"loss": 0.5319,
"step": 13596
},
{
"epoch": 0.86,
"grad_norm": 0.9031259417533875,
"learning_rate": 4.950920678556065e-07,
"loss": 0.5862,
"step": 13597
},
{
"epoch": 0.86,
"grad_norm": 0.836344838142395,
"learning_rate": 4.946470278245813e-07,
"loss": 0.5504,
"step": 13598
},
{
"epoch": 0.86,
"grad_norm": 0.8633370995521545,
"learning_rate": 4.942021774982969e-07,
"loss": 0.5772,
"step": 13599
},
{
"epoch": 0.86,
"grad_norm": 0.8724879622459412,
"learning_rate": 4.937575168954845e-07,
"loss": 0.5347,
"step": 13600
},
{
"epoch": 0.86,
"grad_norm": 0.9412771463394165,
"learning_rate": 4.933130460348673e-07,
"loss": 0.5512,
"step": 13601
},
{
"epoch": 0.86,
"grad_norm": 0.9978772401809692,
"learning_rate": 4.928687649351594e-07,
"loss": 0.5511,
"step": 13602
},
{
"epoch": 0.86,
"grad_norm": 0.8989056348800659,
"learning_rate": 4.924246736150679e-07,
"loss": 0.5492,
"step": 13603
},
{
"epoch": 0.86,
"grad_norm": 0.9470418095588684,
"learning_rate": 4.919807720932946e-07,
"loss": 0.5756,
"step": 13604
},
{
"epoch": 0.86,
"grad_norm": 0.8301222324371338,
"learning_rate": 4.915370603885272e-07,
"loss": 0.5398,
"step": 13605
},
{
"epoch": 0.86,
"grad_norm": 0.8426318764686584,
"learning_rate": 4.91093538519451e-07,
"loss": 0.5703,
"step": 13606
},
{
"epoch": 0.86,
"grad_norm": 0.8601441383361816,
"learning_rate": 4.906502065047403e-07,
"loss": 0.5795,
"step": 13607
},
{
"epoch": 0.86,
"grad_norm": 0.8154615163803101,
"learning_rate": 4.902070643630624e-07,
"loss": 0.5552,
"step": 13608
},
{
"epoch": 0.86,
"grad_norm": 0.8969496488571167,
"learning_rate": 4.89764112113078e-07,
"loss": 0.5814,
"step": 13609
},
{
"epoch": 0.86,
"grad_norm": 0.9093883633613586,
"learning_rate": 4.893213497734356e-07,
"loss": 0.5667,
"step": 13610
},
{
"epoch": 0.86,
"grad_norm": 0.8988984227180481,
"learning_rate": 4.888787773627785e-07,
"loss": 0.5766,
"step": 13611
},
{
"epoch": 0.86,
"grad_norm": 0.8481857180595398,
"learning_rate": 4.884363948997455e-07,
"loss": 0.5216,
"step": 13612
},
{
"epoch": 0.86,
"grad_norm": 0.8880239725112915,
"learning_rate": 4.879942024029599e-07,
"loss": 0.5504,
"step": 13613
},
{
"epoch": 0.86,
"grad_norm": 0.8837846517562866,
"learning_rate": 4.875521998910426e-07,
"loss": 0.5126,
"step": 13614
},
{
"epoch": 0.86,
"grad_norm": 0.905758798122406,
"learning_rate": 4.871103873826044e-07,
"loss": 0.5803,
"step": 13615
},
{
"epoch": 0.86,
"grad_norm": 0.8283089995384216,
"learning_rate": 4.866687648962487e-07,
"loss": 0.6043,
"step": 13616
},
{
"epoch": 0.86,
"grad_norm": 0.9035173654556274,
"learning_rate": 4.862273324505712e-07,
"loss": 0.5171,
"step": 13617
},
{
"epoch": 0.86,
"grad_norm": 0.8479889631271362,
"learning_rate": 4.857860900641576e-07,
"loss": 0.5226,
"step": 13618
},
{
"epoch": 0.86,
"grad_norm": 0.8784950375556946,
"learning_rate": 4.853450377555879e-07,
"loss": 0.5695,
"step": 13619
},
{
"epoch": 0.86,
"grad_norm": 0.8352934122085571,
"learning_rate": 4.849041755434336e-07,
"loss": 0.548,
"step": 13620
},
{
"epoch": 0.86,
"grad_norm": 0.8737031817436218,
"learning_rate": 4.844635034462574e-07,
"loss": 0.503,
"step": 13621
},
{
"epoch": 0.86,
"grad_norm": 0.8494743704795837,
"learning_rate": 4.840230214826147e-07,
"loss": 0.6044,
"step": 13622
},
{
"epoch": 0.86,
"grad_norm": 0.866535484790802,
"learning_rate": 4.835827296710537e-07,
"loss": 0.625,
"step": 13623
},
{
"epoch": 0.86,
"grad_norm": 0.9683859944343567,
"learning_rate": 4.831426280301105e-07,
"loss": 0.6063,
"step": 13624
},
{
"epoch": 0.86,
"grad_norm": 0.8437833189964294,
"learning_rate": 4.8270271657832e-07,
"loss": 0.4983,
"step": 13625
},
{
"epoch": 0.86,
"grad_norm": 0.9570308327674866,
"learning_rate": 4.822629953342028e-07,
"loss": 0.5752,
"step": 13626
},
{
"epoch": 0.86,
"grad_norm": 0.8903212547302246,
"learning_rate": 4.81823464316275e-07,
"loss": 0.558,
"step": 13627
},
{
"epoch": 0.86,
"grad_norm": 0.916301429271698,
"learning_rate": 4.813841235430433e-07,
"loss": 0.5274,
"step": 13628
},
{
"epoch": 0.86,
"grad_norm": 0.863028883934021,
"learning_rate": 4.809449730330068e-07,
"loss": 0.544,
"step": 13629
},
{
"epoch": 0.86,
"grad_norm": 0.9283245205879211,
"learning_rate": 4.805060128046574e-07,
"loss": 0.5725,
"step": 13630
},
{
"epoch": 0.86,
"grad_norm": 0.8969873189926147,
"learning_rate": 4.80067242876478e-07,
"loss": 0.5858,
"step": 13631
},
{
"epoch": 0.86,
"grad_norm": 0.9229633808135986,
"learning_rate": 4.796286632669417e-07,
"loss": 0.5792,
"step": 13632
},
{
"epoch": 0.86,
"grad_norm": 0.9000493288040161,
"learning_rate": 4.791902739945187e-07,
"loss": 0.5484,
"step": 13633
},
{
"epoch": 0.86,
"grad_norm": 0.8788484930992126,
"learning_rate": 4.787520750776658e-07,
"loss": 0.5745,
"step": 13634
},
{
"epoch": 0.86,
"grad_norm": 0.8873356580734253,
"learning_rate": 4.783140665348352e-07,
"loss": 0.509,
"step": 13635
},
{
"epoch": 0.86,
"grad_norm": 0.8811357021331787,
"learning_rate": 4.778762483844701e-07,
"loss": 0.5573,
"step": 13636
},
{
"epoch": 0.86,
"grad_norm": 0.8424716591835022,
"learning_rate": 4.774386206450027e-07,
"loss": 0.5391,
"step": 13637
},
{
"epoch": 0.86,
"grad_norm": 0.8560691475868225,
"learning_rate": 4.770011833348631e-07,
"loss": 0.5524,
"step": 13638
},
{
"epoch": 0.86,
"grad_norm": 0.8537570238113403,
"learning_rate": 4.7656393647247054e-07,
"loss": 0.5459,
"step": 13639
},
{
"epoch": 0.86,
"grad_norm": 0.8615885972976685,
"learning_rate": 4.7612688007623363e-07,
"loss": 0.5397,
"step": 13640
},
{
"epoch": 0.86,
"grad_norm": 0.8361106514930725,
"learning_rate": 4.756900141645565e-07,
"loss": 0.6052,
"step": 13641
},
{
"epoch": 0.86,
"grad_norm": 0.8686729669570923,
"learning_rate": 4.752533387558339e-07,
"loss": 0.5867,
"step": 13642
},
{
"epoch": 0.86,
"grad_norm": 0.8469032645225525,
"learning_rate": 4.748168538684528e-07,
"loss": 0.5686,
"step": 13643
},
{
"epoch": 0.86,
"grad_norm": 0.9301448464393616,
"learning_rate": 4.7438055952079287e-07,
"loss": 0.6143,
"step": 13644
},
{
"epoch": 0.86,
"grad_norm": 0.9038071036338806,
"learning_rate": 4.739444557312223e-07,
"loss": 0.5559,
"step": 13645
},
{
"epoch": 0.86,
"grad_norm": 0.8837379813194275,
"learning_rate": 4.735085425181063e-07,
"loss": 0.5633,
"step": 13646
},
{
"epoch": 0.86,
"grad_norm": 0.884790301322937,
"learning_rate": 4.730728198998008e-07,
"loss": 0.6254,
"step": 13647
},
{
"epoch": 0.86,
"grad_norm": 0.918059766292572,
"learning_rate": 4.726372878946489e-07,
"loss": 0.5338,
"step": 13648
},
{
"epoch": 0.86,
"grad_norm": 0.8778460621833801,
"learning_rate": 4.7220194652099204e-07,
"loss": 0.5891,
"step": 13649
},
{
"epoch": 0.86,
"grad_norm": 0.9263706207275391,
"learning_rate": 4.7176679579716e-07,
"loss": 0.5847,
"step": 13650
},
{
"epoch": 0.86,
"grad_norm": 0.9742307066917419,
"learning_rate": 4.7133183574147534e-07,
"loss": 0.6366,
"step": 13651
},
{
"epoch": 0.86,
"grad_norm": 0.9247993230819702,
"learning_rate": 4.7089706637225283e-07,
"loss": 0.6062,
"step": 13652
},
{
"epoch": 0.86,
"grad_norm": 0.8701785802841187,
"learning_rate": 4.7046248770780065e-07,
"loss": 0.5936,
"step": 13653
},
{
"epoch": 0.87,
"grad_norm": 0.8334656953811646,
"learning_rate": 4.7002809976641417e-07,
"loss": 0.5348,
"step": 13654
},
{
"epoch": 0.87,
"grad_norm": 0.9703954458236694,
"learning_rate": 4.6959390256638703e-07,
"loss": 0.5806,
"step": 13655
},
{
"epoch": 0.87,
"grad_norm": 0.9238660335540771,
"learning_rate": 4.691598961260002e-07,
"loss": 0.5771,
"step": 13656
},
{
"epoch": 0.87,
"grad_norm": 0.9725003838539124,
"learning_rate": 4.68726080463528e-07,
"loss": 0.6504,
"step": 13657
},
{
"epoch": 0.87,
"grad_norm": 0.8711181879043579,
"learning_rate": 4.682924555972379e-07,
"loss": 0.5642,
"step": 13658
},
{
"epoch": 0.87,
"grad_norm": 0.8927187919616699,
"learning_rate": 4.6785902154538763e-07,
"loss": 0.6176,
"step": 13659
},
{
"epoch": 0.87,
"grad_norm": 0.9735706448554993,
"learning_rate": 4.674257783262276e-07,
"loss": 0.6128,
"step": 13660
},
{
"epoch": 0.87,
"grad_norm": 0.9042197465896606,
"learning_rate": 4.669927259580015e-07,
"loss": 0.5782,
"step": 13661
},
{
"epoch": 0.87,
"grad_norm": 0.8241575956344604,
"learning_rate": 4.665598644589409e-07,
"loss": 0.4985,
"step": 13662
},
{
"epoch": 0.87,
"grad_norm": 0.8879325985908508,
"learning_rate": 4.6612719384727556e-07,
"loss": 0.5778,
"step": 13663
},
{
"epoch": 0.87,
"grad_norm": 0.9175477027893066,
"learning_rate": 4.656947141412205e-07,
"loss": 0.5892,
"step": 13664
},
{
"epoch": 0.87,
"grad_norm": 0.8603050112724304,
"learning_rate": 4.652624253589877e-07,
"loss": 0.6211,
"step": 13665
},
{
"epoch": 0.87,
"grad_norm": 0.9173632860183716,
"learning_rate": 4.6483032751877987e-07,
"loss": 0.6,
"step": 13666
},
{
"epoch": 0.87,
"grad_norm": 0.9489515423774719,
"learning_rate": 4.6439842063878803e-07,
"loss": 0.5417,
"step": 13667
},
{
"epoch": 0.87,
"grad_norm": 0.8993018865585327,
"learning_rate": 4.639667047372015e-07,
"loss": 0.5831,
"step": 13668
},
{
"epoch": 0.87,
"grad_norm": 0.8332312107086182,
"learning_rate": 4.6353517983219856e-07,
"loss": 0.5133,
"step": 13669
},
{
"epoch": 0.87,
"grad_norm": 0.8579809069633484,
"learning_rate": 4.631038459419468e-07,
"loss": 0.5473,
"step": 13670
},
{
"epoch": 0.87,
"grad_norm": 0.9015935659408569,
"learning_rate": 4.6267270308460955e-07,
"loss": 0.5857,
"step": 13671
},
{
"epoch": 0.87,
"grad_norm": 0.8048023581504822,
"learning_rate": 4.6224175127834057e-07,
"loss": 0.5637,
"step": 13672
},
{
"epoch": 0.87,
"grad_norm": 0.8290963172912598,
"learning_rate": 4.61810990541286e-07,
"loss": 0.5104,
"step": 13673
},
{
"epoch": 0.87,
"grad_norm": 0.8655577301979065,
"learning_rate": 4.61380420891584e-07,
"loss": 0.5858,
"step": 13674
},
{
"epoch": 0.87,
"grad_norm": 0.9205370545387268,
"learning_rate": 4.6095004234736175e-07,
"loss": 0.6064,
"step": 13675
},
{
"epoch": 0.87,
"grad_norm": 0.8846642374992371,
"learning_rate": 4.6051985492674425e-07,
"loss": 0.5652,
"step": 13676
},
{
"epoch": 0.87,
"grad_norm": 0.8984456062316895,
"learning_rate": 4.6008985864784473e-07,
"loss": 0.578,
"step": 13677
},
{
"epoch": 0.87,
"grad_norm": 0.9386430382728577,
"learning_rate": 4.596600535287671e-07,
"loss": 0.6143,
"step": 13678
},
{
"epoch": 0.87,
"grad_norm": 0.9357401132583618,
"learning_rate": 4.592304395876102e-07,
"loss": 0.5837,
"step": 13679
},
{
"epoch": 0.87,
"grad_norm": 0.9388497471809387,
"learning_rate": 4.588010168424628e-07,
"loss": 0.5809,
"step": 13680
},
{
"epoch": 0.87,
"grad_norm": 0.9037414193153381,
"learning_rate": 4.5837178531140723e-07,
"loss": 0.5671,
"step": 13681
},
{
"epoch": 0.87,
"grad_norm": 0.9199149012565613,
"learning_rate": 4.579427450125179e-07,
"loss": 0.5612,
"step": 13682
},
{
"epoch": 0.87,
"grad_norm": 0.8453497886657715,
"learning_rate": 4.5751389596385755e-07,
"loss": 0.5223,
"step": 13683
},
{
"epoch": 0.87,
"grad_norm": 0.9701248407363892,
"learning_rate": 4.570852381834839e-07,
"loss": 0.556,
"step": 13684
},
{
"epoch": 0.87,
"grad_norm": 0.8935304284095764,
"learning_rate": 4.5665677168944935e-07,
"loss": 0.6301,
"step": 13685
},
{
"epoch": 0.87,
"grad_norm": 0.8989062905311584,
"learning_rate": 4.562284964997915e-07,
"loss": 0.5626,
"step": 13686
},
{
"epoch": 0.87,
"grad_norm": 0.9354601502418518,
"learning_rate": 4.5580041263254547e-07,
"loss": 0.6159,
"step": 13687
},
{
"epoch": 0.87,
"grad_norm": 0.8538670539855957,
"learning_rate": 4.553725201057363e-07,
"loss": 0.5656,
"step": 13688
},
{
"epoch": 0.87,
"grad_norm": 0.9553387761116028,
"learning_rate": 4.5494481893738005e-07,
"loss": 0.5389,
"step": 13689
},
{
"epoch": 0.87,
"grad_norm": 1.0032283067703247,
"learning_rate": 4.5451730914548744e-07,
"loss": 0.6298,
"step": 13690
},
{
"epoch": 0.87,
"grad_norm": 0.8711049556732178,
"learning_rate": 4.540899907480578e-07,
"loss": 0.5762,
"step": 13691
},
{
"epoch": 0.87,
"grad_norm": 0.8655171990394592,
"learning_rate": 4.536628637630836e-07,
"loss": 0.552,
"step": 13692
},
{
"epoch": 0.87,
"grad_norm": 0.8877602815628052,
"learning_rate": 4.532359282085519e-07,
"loss": 0.6132,
"step": 13693
},
{
"epoch": 0.87,
"grad_norm": 0.8526985049247742,
"learning_rate": 4.528091841024379e-07,
"loss": 0.5487,
"step": 13694
},
{
"epoch": 0.87,
"grad_norm": 0.8731285333633423,
"learning_rate": 4.5238263146271053e-07,
"loss": 0.5844,
"step": 13695
},
{
"epoch": 0.87,
"grad_norm": 0.9351499676704407,
"learning_rate": 4.5195627030733156e-07,
"loss": 0.5862,
"step": 13696
},
{
"epoch": 0.87,
"grad_norm": 0.8490439653396606,
"learning_rate": 4.5153010065425054e-07,
"loss": 0.5402,
"step": 13697
},
{
"epoch": 0.87,
"grad_norm": 0.8659386038780212,
"learning_rate": 4.511041225214158e-07,
"loss": 0.561,
"step": 13698
},
{
"epoch": 0.87,
"grad_norm": 0.8737561106681824,
"learning_rate": 4.5067833592676136e-07,
"loss": 0.5404,
"step": 13699
},
{
"epoch": 0.87,
"grad_norm": 0.8905614614486694,
"learning_rate": 4.502527408882157e-07,
"loss": 0.5343,
"step": 13700
},
{
"epoch": 0.87,
"grad_norm": 0.8490473031997681,
"learning_rate": 4.498273374237e-07,
"loss": 0.5809,
"step": 13701
},
{
"epoch": 0.87,
"grad_norm": 0.9129199981689453,
"learning_rate": 4.494021255511266e-07,
"loss": 0.5969,
"step": 13702
},
{
"epoch": 0.87,
"grad_norm": 0.9153651595115662,
"learning_rate": 4.48977105288399e-07,
"loss": 0.571,
"step": 13703
},
{
"epoch": 0.87,
"grad_norm": 0.957604706287384,
"learning_rate": 4.485522766534145e-07,
"loss": 0.6299,
"step": 13704
},
{
"epoch": 0.87,
"grad_norm": 0.8316980600357056,
"learning_rate": 4.4812763966405825e-07,
"loss": 0.5236,
"step": 13705
},
{
"epoch": 0.87,
"grad_norm": 0.8910514712333679,
"learning_rate": 4.4770319433821487e-07,
"loss": 0.5742,
"step": 13706
},
{
"epoch": 0.87,
"grad_norm": 0.904670000076294,
"learning_rate": 4.472789406937522e-07,
"loss": 0.5931,
"step": 13707
},
{
"epoch": 0.87,
"grad_norm": 0.9204214215278625,
"learning_rate": 4.468548787485355e-07,
"loss": 0.5939,
"step": 13708
},
{
"epoch": 0.87,
"grad_norm": 0.9537574648857117,
"learning_rate": 4.4643100852042097e-07,
"loss": 0.5974,
"step": 13709
},
{
"epoch": 0.87,
"grad_norm": 0.9143358469009399,
"learning_rate": 4.4600733002725547e-07,
"loss": 0.5724,
"step": 13710
},
{
"epoch": 0.87,
"grad_norm": 0.8609566688537598,
"learning_rate": 4.4558384328687975e-07,
"loss": 0.5714,
"step": 13711
},
{
"epoch": 0.87,
"grad_norm": 0.8667165637016296,
"learning_rate": 4.451605483171251e-07,
"loss": 0.571,
"step": 13712
},
{
"epoch": 0.87,
"grad_norm": 0.8885953426361084,
"learning_rate": 4.4473744513581384e-07,
"loss": 0.5167,
"step": 13713
},
{
"epoch": 0.87,
"grad_norm": 0.8588200807571411,
"learning_rate": 4.443145337607624e-07,
"loss": 0.5433,
"step": 13714
},
{
"epoch": 0.87,
"grad_norm": 0.8709940910339355,
"learning_rate": 4.4389181420977814e-07,
"loss": 0.6293,
"step": 13715
},
{
"epoch": 0.87,
"grad_norm": 0.8453631401062012,
"learning_rate": 4.4346928650065957e-07,
"loss": 0.5408,
"step": 13716
},
{
"epoch": 0.87,
"grad_norm": 0.8846293091773987,
"learning_rate": 4.4304695065119807e-07,
"loss": 0.5588,
"step": 13717
},
{
"epoch": 0.87,
"grad_norm": 0.8700962066650391,
"learning_rate": 4.4262480667917774e-07,
"loss": 0.5479,
"step": 13718
},
{
"epoch": 0.87,
"grad_norm": 0.844928503036499,
"learning_rate": 4.422028546023721e-07,
"loss": 0.5638,
"step": 13719
},
{
"epoch": 0.87,
"grad_norm": 0.8891464471817017,
"learning_rate": 4.4178109443855033e-07,
"loss": 0.5845,
"step": 13720
},
{
"epoch": 0.87,
"grad_norm": 0.8719486594200134,
"learning_rate": 4.4135952620546876e-07,
"loss": 0.5724,
"step": 13721
},
{
"epoch": 0.87,
"grad_norm": 0.8617244958877563,
"learning_rate": 4.409381499208787e-07,
"loss": 0.5087,
"step": 13722
},
{
"epoch": 0.87,
"grad_norm": 0.8584579229354858,
"learning_rate": 4.405169656025238e-07,
"loss": 0.5701,
"step": 13723
},
{
"epoch": 0.87,
"grad_norm": 0.8903681635856628,
"learning_rate": 4.400959732681381e-07,
"loss": 0.5974,
"step": 13724
},
{
"epoch": 0.87,
"grad_norm": 0.878350019454956,
"learning_rate": 4.3967517293544814e-07,
"loss": 0.5478,
"step": 13725
},
{
"epoch": 0.87,
"grad_norm": 0.9635295271873474,
"learning_rate": 4.3925456462217244e-07,
"loss": 0.5976,
"step": 13726
},
{
"epoch": 0.87,
"grad_norm": 0.897746741771698,
"learning_rate": 4.3883414834602125e-07,
"loss": 0.5702,
"step": 13727
},
{
"epoch": 0.87,
"grad_norm": 0.8466120958328247,
"learning_rate": 4.384139241246982e-07,
"loss": 0.6266,
"step": 13728
},
{
"epoch": 0.87,
"grad_norm": 0.9046663045883179,
"learning_rate": 4.3799389197589525e-07,
"loss": 0.5742,
"step": 13729
},
{
"epoch": 0.87,
"grad_norm": 0.855974018573761,
"learning_rate": 4.375740519172994e-07,
"loss": 0.6135,
"step": 13730
},
{
"epoch": 0.87,
"grad_norm": 0.8562418818473816,
"learning_rate": 4.3715440396658816e-07,
"loss": 0.5726,
"step": 13731
},
{
"epoch": 0.87,
"grad_norm": 1.0470370054244995,
"learning_rate": 4.3673494814143234e-07,
"loss": 0.5896,
"step": 13732
},
{
"epoch": 0.87,
"grad_norm": 0.9556792378425598,
"learning_rate": 4.3631568445949403e-07,
"loss": 0.5409,
"step": 13733
},
{
"epoch": 0.87,
"grad_norm": 0.8872630000114441,
"learning_rate": 4.3589661293842624e-07,
"loss": 0.5565,
"step": 13734
},
{
"epoch": 0.87,
"grad_norm": 0.9071952104568481,
"learning_rate": 4.3547773359587377e-07,
"loss": 0.6007,
"step": 13735
},
{
"epoch": 0.87,
"grad_norm": 0.8532198071479797,
"learning_rate": 4.350590464494764e-07,
"loss": 0.521,
"step": 13736
},
{
"epoch": 0.87,
"grad_norm": 0.8936211466789246,
"learning_rate": 4.346405515168617e-07,
"loss": 0.5663,
"step": 13737
},
{
"epoch": 0.87,
"grad_norm": 0.9199041128158569,
"learning_rate": 4.342222488156511e-07,
"loss": 0.5873,
"step": 13738
},
{
"epoch": 0.87,
"grad_norm": 0.8879461884498596,
"learning_rate": 4.3380413836345893e-07,
"loss": 0.5838,
"step": 13739
},
{
"epoch": 0.87,
"grad_norm": 0.8619484305381775,
"learning_rate": 4.333862201778899e-07,
"loss": 0.5107,
"step": 13740
},
{
"epoch": 0.87,
"grad_norm": 0.9578720927238464,
"learning_rate": 4.329684942765411e-07,
"loss": 0.5738,
"step": 13741
},
{
"epoch": 0.87,
"grad_norm": 0.8812727928161621,
"learning_rate": 4.3255096067700176e-07,
"loss": 0.5691,
"step": 13742
},
{
"epoch": 0.87,
"grad_norm": 0.847726047039032,
"learning_rate": 4.321336193968523e-07,
"loss": 0.5662,
"step": 13743
},
{
"epoch": 0.87,
"grad_norm": 0.8409244418144226,
"learning_rate": 4.3171647045366525e-07,
"loss": 0.543,
"step": 13744
},
{
"epoch": 0.87,
"grad_norm": 0.966153621673584,
"learning_rate": 4.312995138650056e-07,
"loss": 0.5944,
"step": 13745
},
{
"epoch": 0.87,
"grad_norm": 0.8785676956176758,
"learning_rate": 4.3088274964843027e-07,
"loss": 0.5476,
"step": 13746
},
{
"epoch": 0.87,
"grad_norm": 0.9022130370140076,
"learning_rate": 4.3046617782148857e-07,
"loss": 0.5837,
"step": 13747
},
{
"epoch": 0.87,
"grad_norm": 0.9164488911628723,
"learning_rate": 4.300497984017182e-07,
"loss": 0.5348,
"step": 13748
},
{
"epoch": 0.87,
"grad_norm": 0.8544109463691711,
"learning_rate": 4.2963361140665405e-07,
"loss": 0.5099,
"step": 13749
},
{
"epoch": 0.87,
"grad_norm": 0.8812281489372253,
"learning_rate": 4.292176168538198e-07,
"loss": 0.5676,
"step": 13750
},
{
"epoch": 0.87,
"grad_norm": 0.9989331364631653,
"learning_rate": 4.2880181476073034e-07,
"loss": 0.5962,
"step": 13751
},
{
"epoch": 0.87,
"grad_norm": 0.8740145564079285,
"learning_rate": 4.283862051448945e-07,
"loss": 0.5772,
"step": 13752
},
{
"epoch": 0.87,
"grad_norm": 0.9067648649215698,
"learning_rate": 4.279707880238121e-07,
"loss": 0.57,
"step": 13753
},
{
"epoch": 0.87,
"grad_norm": 0.9112171530723572,
"learning_rate": 4.275555634149753e-07,
"loss": 0.605,
"step": 13754
},
{
"epoch": 0.87,
"grad_norm": 0.8743265867233276,
"learning_rate": 4.2714053133586785e-07,
"loss": 0.5284,
"step": 13755
},
{
"epoch": 0.87,
"grad_norm": 0.9052802324295044,
"learning_rate": 4.267256918039625e-07,
"loss": 0.5345,
"step": 13756
},
{
"epoch": 0.87,
"grad_norm": 0.840216875076294,
"learning_rate": 4.263110448367308e-07,
"loss": 0.5121,
"step": 13757
},
{
"epoch": 0.87,
"grad_norm": 0.8973818421363831,
"learning_rate": 4.2589659045163044e-07,
"loss": 0.6348,
"step": 13758
},
{
"epoch": 0.87,
"grad_norm": 0.8948500752449036,
"learning_rate": 4.254823286661125e-07,
"loss": 0.5814,
"step": 13759
},
{
"epoch": 0.87,
"grad_norm": 0.920590341091156,
"learning_rate": 4.250682594976191e-07,
"loss": 0.5692,
"step": 13760
},
{
"epoch": 0.87,
"grad_norm": 0.885006844997406,
"learning_rate": 4.2465438296358685e-07,
"loss": 0.5802,
"step": 13761
},
{
"epoch": 0.87,
"grad_norm": 0.847855269908905,
"learning_rate": 4.2424069908144236e-07,
"loss": 0.5368,
"step": 13762
},
{
"epoch": 0.87,
"grad_norm": 0.9912233948707581,
"learning_rate": 4.2382720786860453e-07,
"loss": 0.5901,
"step": 13763
},
{
"epoch": 0.87,
"grad_norm": 0.9090965986251831,
"learning_rate": 4.2341390934248273e-07,
"loss": 0.5469,
"step": 13764
},
{
"epoch": 0.87,
"grad_norm": 0.8936121463775635,
"learning_rate": 4.230008035204797e-07,
"loss": 0.5723,
"step": 13765
},
{
"epoch": 0.87,
"grad_norm": 0.8051524758338928,
"learning_rate": 4.225878904199926e-07,
"loss": 0.4853,
"step": 13766
},
{
"epoch": 0.87,
"grad_norm": 0.8978790640830994,
"learning_rate": 4.2217517005840423e-07,
"loss": 0.5568,
"step": 13767
},
{
"epoch": 0.87,
"grad_norm": 0.8630240559577942,
"learning_rate": 4.2176264245309517e-07,
"loss": 0.5686,
"step": 13768
},
{
"epoch": 0.87,
"grad_norm": 0.8735791444778442,
"learning_rate": 4.2135030762143424e-07,
"loss": 0.5625,
"step": 13769
},
{
"epoch": 0.87,
"grad_norm": 0.9239519238471985,
"learning_rate": 4.2093816558078373e-07,
"loss": 0.656,
"step": 13770
},
{
"epoch": 0.87,
"grad_norm": 0.8584021925926208,
"learning_rate": 4.205262163484991e-07,
"loss": 0.5384,
"step": 13771
},
{
"epoch": 0.87,
"grad_norm": 1.000178575515747,
"learning_rate": 4.2011445994192324e-07,
"loss": 0.5553,
"step": 13772
},
{
"epoch": 0.87,
"grad_norm": 1.005164384841919,
"learning_rate": 4.1970289637839556e-07,
"loss": 0.5724,
"step": 13773
},
{
"epoch": 0.87,
"grad_norm": 0.8568700551986694,
"learning_rate": 4.19291525675245e-07,
"loss": 0.567,
"step": 13774
},
{
"epoch": 0.87,
"grad_norm": 0.9740828275680542,
"learning_rate": 4.1888034784979326e-07,
"loss": 0.6062,
"step": 13775
},
{
"epoch": 0.87,
"grad_norm": 0.8327596187591553,
"learning_rate": 4.184693629193537e-07,
"loss": 0.627,
"step": 13776
},
{
"epoch": 0.87,
"grad_norm": 0.8938369154930115,
"learning_rate": 4.180585709012319e-07,
"loss": 0.577,
"step": 13777
},
{
"epoch": 0.87,
"grad_norm": 0.8749104738235474,
"learning_rate": 4.1764797181272296e-07,
"loss": 0.5819,
"step": 13778
},
{
"epoch": 0.87,
"grad_norm": 0.8669180870056152,
"learning_rate": 4.172375656711181e-07,
"loss": 0.5144,
"step": 13779
},
{
"epoch": 0.87,
"grad_norm": 0.9040730595588684,
"learning_rate": 4.1682735249369663e-07,
"loss": 0.6053,
"step": 13780
},
{
"epoch": 0.87,
"grad_norm": 0.9332876801490784,
"learning_rate": 4.1641733229773163e-07,
"loss": 0.5463,
"step": 13781
},
{
"epoch": 0.87,
"grad_norm": 0.8687313795089722,
"learning_rate": 4.1600750510048805e-07,
"loss": 0.5951,
"step": 13782
},
{
"epoch": 0.87,
"grad_norm": 0.9103281497955322,
"learning_rate": 4.1559787091922153e-07,
"loss": 0.5824,
"step": 13783
},
{
"epoch": 0.87,
"grad_norm": 0.840823233127594,
"learning_rate": 4.151884297711806e-07,
"loss": 0.5203,
"step": 13784
},
{
"epoch": 0.87,
"grad_norm": 0.890895664691925,
"learning_rate": 4.147791816736063e-07,
"loss": 0.5681,
"step": 13785
},
{
"epoch": 0.87,
"grad_norm": 0.8432772755622864,
"learning_rate": 4.143701266437283e-07,
"loss": 0.5618,
"step": 13786
},
{
"epoch": 0.87,
"grad_norm": 0.8869197368621826,
"learning_rate": 4.139612646987734e-07,
"loss": 0.5829,
"step": 13787
},
{
"epoch": 0.87,
"grad_norm": 0.9564074873924255,
"learning_rate": 4.135525958559555e-07,
"loss": 0.6305,
"step": 13788
},
{
"epoch": 0.87,
"grad_norm": 0.8258056044578552,
"learning_rate": 4.131441201324826e-07,
"loss": 0.5646,
"step": 13789
},
{
"epoch": 0.87,
"grad_norm": 0.9355778694152832,
"learning_rate": 4.1273583754555424e-07,
"loss": 0.5543,
"step": 13790
},
{
"epoch": 0.87,
"grad_norm": 0.895876407623291,
"learning_rate": 4.123277481123622e-07,
"loss": 0.5631,
"step": 13791
},
{
"epoch": 0.87,
"grad_norm": 0.8127149343490601,
"learning_rate": 4.1191985185008887e-07,
"loss": 0.5681,
"step": 13792
},
{
"epoch": 0.87,
"grad_norm": 0.8988841772079468,
"learning_rate": 4.1151214877591105e-07,
"loss": 0.5723,
"step": 13793
},
{
"epoch": 0.87,
"grad_norm": 0.9248142242431641,
"learning_rate": 4.1110463890699336e-07,
"loss": 0.5945,
"step": 13794
},
{
"epoch": 0.87,
"grad_norm": 0.9253085851669312,
"learning_rate": 4.1069732226049484e-07,
"loss": 0.6259,
"step": 13795
},
{
"epoch": 0.87,
"grad_norm": 0.8506118059158325,
"learning_rate": 4.102901988535685e-07,
"loss": 0.5496,
"step": 13796
},
{
"epoch": 0.87,
"grad_norm": 0.8817588686943054,
"learning_rate": 4.0988326870335494e-07,
"loss": 0.5534,
"step": 13797
},
{
"epoch": 0.87,
"grad_norm": 0.9294220805168152,
"learning_rate": 4.0947653182698887e-07,
"loss": 0.6071,
"step": 13798
},
{
"epoch": 0.87,
"grad_norm": 0.9263404011726379,
"learning_rate": 4.0906998824159715e-07,
"loss": 0.6115,
"step": 13799
},
{
"epoch": 0.87,
"grad_norm": 0.9097710251808167,
"learning_rate": 4.086636379642972e-07,
"loss": 0.5834,
"step": 13800
},
{
"epoch": 0.87,
"grad_norm": 0.9048157930374146,
"learning_rate": 4.0825748101220087e-07,
"loss": 0.5976,
"step": 13801
},
{
"epoch": 0.87,
"grad_norm": 0.8855105042457581,
"learning_rate": 4.078515174024067e-07,
"loss": 0.571,
"step": 13802
},
{
"epoch": 0.87,
"grad_norm": 0.8673086762428284,
"learning_rate": 4.074457471520099e-07,
"loss": 0.5968,
"step": 13803
},
{
"epoch": 0.87,
"grad_norm": 0.8577106595039368,
"learning_rate": 4.0704017027809797e-07,
"loss": 0.5826,
"step": 13804
},
{
"epoch": 0.87,
"grad_norm": 0.8799236416816711,
"learning_rate": 4.0663478679774604e-07,
"loss": 0.5497,
"step": 13805
},
{
"epoch": 0.87,
"grad_norm": 0.8574314117431641,
"learning_rate": 4.062295967280239e-07,
"loss": 0.5452,
"step": 13806
},
{
"epoch": 0.87,
"grad_norm": 0.8260728120803833,
"learning_rate": 4.058246000859939e-07,
"loss": 0.6122,
"step": 13807
},
{
"epoch": 0.87,
"grad_norm": 0.8879086375236511,
"learning_rate": 4.054197968887064e-07,
"loss": 0.5183,
"step": 13808
},
{
"epoch": 0.87,
"grad_norm": 0.8634669184684753,
"learning_rate": 4.0501518715320933e-07,
"loss": 0.5658,
"step": 13809
},
{
"epoch": 0.87,
"grad_norm": 0.8982515931129456,
"learning_rate": 4.046107708965369e-07,
"loss": 0.5977,
"step": 13810
},
{
"epoch": 0.88,
"grad_norm": 0.9243874549865723,
"learning_rate": 4.042065481357188e-07,
"loss": 0.5895,
"step": 13811
},
{
"epoch": 0.88,
"grad_norm": 0.9220935702323914,
"learning_rate": 4.038025188877753e-07,
"loss": 0.5842,
"step": 13812
},
{
"epoch": 0.88,
"grad_norm": 0.9362528920173645,
"learning_rate": 4.03398683169719e-07,
"loss": 0.5917,
"step": 13813
},
{
"epoch": 0.88,
"grad_norm": 0.9390722513198853,
"learning_rate": 4.029950409985539e-07,
"loss": 0.5915,
"step": 13814
},
{
"epoch": 0.88,
"grad_norm": 0.9555262327194214,
"learning_rate": 4.0259159239127656e-07,
"loss": 0.5891,
"step": 13815
},
{
"epoch": 0.88,
"grad_norm": 0.8341988325119019,
"learning_rate": 4.021883373648722e-07,
"loss": 0.5347,
"step": 13816
},
{
"epoch": 0.88,
"grad_norm": 0.8047258853912354,
"learning_rate": 4.017852759363239e-07,
"loss": 0.4463,
"step": 13817
},
{
"epoch": 0.88,
"grad_norm": 0.8843516707420349,
"learning_rate": 4.013824081226009e-07,
"loss": 0.5822,
"step": 13818
},
{
"epoch": 0.88,
"grad_norm": 0.889491856098175,
"learning_rate": 4.009797339406674e-07,
"loss": 0.5696,
"step": 13819
},
{
"epoch": 0.88,
"grad_norm": 0.866584062576294,
"learning_rate": 4.005772534074792e-07,
"loss": 0.5666,
"step": 13820
},
{
"epoch": 0.88,
"grad_norm": 0.9304389953613281,
"learning_rate": 4.001749665399807e-07,
"loss": 0.5963,
"step": 13821
},
{
"epoch": 0.88,
"grad_norm": 0.9130128026008606,
"learning_rate": 3.997728733551137e-07,
"loss": 0.5348,
"step": 13822
},
{
"epoch": 0.88,
"grad_norm": 0.8755511045455933,
"learning_rate": 3.993709738698093e-07,
"loss": 0.5714,
"step": 13823
},
{
"epoch": 0.88,
"grad_norm": 0.8576176762580872,
"learning_rate": 3.989692681009877e-07,
"loss": 0.5845,
"step": 13824
},
{
"epoch": 0.88,
"grad_norm": 0.9526224136352539,
"learning_rate": 3.985677560655643e-07,
"loss": 0.5841,
"step": 13825
},
{
"epoch": 0.88,
"grad_norm": 0.915798544883728,
"learning_rate": 3.9816643778044506e-07,
"loss": 0.6271,
"step": 13826
},
{
"epoch": 0.88,
"grad_norm": 0.8685756921768188,
"learning_rate": 3.977653132625292e-07,
"loss": 0.5363,
"step": 13827
},
{
"epoch": 0.88,
"grad_norm": 0.8983719348907471,
"learning_rate": 3.9736438252870655e-07,
"loss": 0.589,
"step": 13828
},
{
"epoch": 0.88,
"grad_norm": 0.9057663679122925,
"learning_rate": 3.969636455958564e-07,
"loss": 0.587,
"step": 13829
},
{
"epoch": 0.88,
"grad_norm": 0.886985719203949,
"learning_rate": 3.965631024808553e-07,
"loss": 0.6049,
"step": 13830
},
{
"epoch": 0.88,
"grad_norm": 0.8384401798248291,
"learning_rate": 3.961627532005691e-07,
"loss": 0.519,
"step": 13831
},
{
"epoch": 0.88,
"grad_norm": 0.851017951965332,
"learning_rate": 3.957625977718527e-07,
"loss": 0.5821,
"step": 13832
},
{
"epoch": 0.88,
"grad_norm": 0.8468850255012512,
"learning_rate": 3.953626362115559e-07,
"loss": 0.5651,
"step": 13833
},
{
"epoch": 0.88,
"grad_norm": 0.9317176938056946,
"learning_rate": 3.949628685365203e-07,
"loss": 0.5769,
"step": 13834
},
{
"epoch": 0.88,
"grad_norm": 0.888606607913971,
"learning_rate": 3.945632947635791e-07,
"loss": 0.5461,
"step": 13835
},
{
"epoch": 0.88,
"grad_norm": 0.8703384399414062,
"learning_rate": 3.941639149095566e-07,
"loss": 0.6008,
"step": 13836
},
{
"epoch": 0.88,
"grad_norm": 0.8924920558929443,
"learning_rate": 3.9376472899126884e-07,
"loss": 0.5616,
"step": 13837
},
{
"epoch": 0.88,
"grad_norm": 0.8976526856422424,
"learning_rate": 3.933657370255228e-07,
"loss": 0.603,
"step": 13838
},
{
"epoch": 0.88,
"grad_norm": 0.9381121397018433,
"learning_rate": 3.9296693902912244e-07,
"loss": 0.5989,
"step": 13839
},
{
"epoch": 0.88,
"grad_norm": 0.8531518578529358,
"learning_rate": 3.9256833501885693e-07,
"loss": 0.613,
"step": 13840
},
{
"epoch": 0.88,
"grad_norm": 0.8720222115516663,
"learning_rate": 3.9216992501151074e-07,
"loss": 0.5606,
"step": 13841
},
{
"epoch": 0.88,
"grad_norm": 0.8882898688316345,
"learning_rate": 3.917717090238593e-07,
"loss": 0.6199,
"step": 13842
},
{
"epoch": 0.88,
"grad_norm": 0.898091197013855,
"learning_rate": 3.913736870726703e-07,
"loss": 0.5583,
"step": 13843
},
{
"epoch": 0.88,
"grad_norm": 0.8894566893577576,
"learning_rate": 3.909758591747037e-07,
"loss": 0.6388,
"step": 13844
},
{
"epoch": 0.88,
"grad_norm": 0.8718437552452087,
"learning_rate": 3.905782253467094e-07,
"loss": 0.5293,
"step": 13845
},
{
"epoch": 0.88,
"grad_norm": 0.8743876218795776,
"learning_rate": 3.9018078560543015e-07,
"loss": 0.5883,
"step": 13846
},
{
"epoch": 0.88,
"grad_norm": 0.9666427373886108,
"learning_rate": 3.8978353996760365e-07,
"loss": 0.5466,
"step": 13847
},
{
"epoch": 0.88,
"grad_norm": 0.8270087838172913,
"learning_rate": 3.8938648844995374e-07,
"loss": 0.5458,
"step": 13848
},
{
"epoch": 0.88,
"grad_norm": 0.8940174579620361,
"learning_rate": 3.889896310691993e-07,
"loss": 0.5766,
"step": 13849
},
{
"epoch": 0.88,
"grad_norm": 0.9705901741981506,
"learning_rate": 3.885929678420508e-07,
"loss": 0.5894,
"step": 13850
},
{
"epoch": 0.88,
"grad_norm": 0.8427651524543762,
"learning_rate": 3.881964987852105e-07,
"loss": 0.5606,
"step": 13851
},
{
"epoch": 0.88,
"grad_norm": 0.9404253363609314,
"learning_rate": 3.878002239153739e-07,
"loss": 0.635,
"step": 13852
},
{
"epoch": 0.88,
"grad_norm": 0.8982987403869629,
"learning_rate": 3.874041432492237e-07,
"loss": 0.6041,
"step": 13853
},
{
"epoch": 0.88,
"grad_norm": 0.9181349873542786,
"learning_rate": 3.870082568034389e-07,
"loss": 0.5793,
"step": 13854
},
{
"epoch": 0.88,
"grad_norm": 0.9045166969299316,
"learning_rate": 3.866125645946894e-07,
"loss": 0.533,
"step": 13855
},
{
"epoch": 0.88,
"grad_norm": 0.8742471933364868,
"learning_rate": 3.862170666396359e-07,
"loss": 0.5753,
"step": 13856
},
{
"epoch": 0.88,
"grad_norm": 0.8175247311592102,
"learning_rate": 3.858217629549316e-07,
"loss": 0.5068,
"step": 13857
},
{
"epoch": 0.88,
"grad_norm": 0.9291229248046875,
"learning_rate": 3.8542665355722154e-07,
"loss": 0.5553,
"step": 13858
},
{
"epoch": 0.88,
"grad_norm": 0.8577315807342529,
"learning_rate": 3.8503173846314137e-07,
"loss": 0.489,
"step": 13859
},
{
"epoch": 0.88,
"grad_norm": 0.9240617752075195,
"learning_rate": 3.846370176893205e-07,
"loss": 0.5803,
"step": 13860
},
{
"epoch": 0.88,
"grad_norm": 0.9180030226707458,
"learning_rate": 3.8424249125238065e-07,
"loss": 0.5931,
"step": 13861
},
{
"epoch": 0.88,
"grad_norm": 0.8835217356681824,
"learning_rate": 3.838481591689308e-07,
"loss": 0.5581,
"step": 13862
},
{
"epoch": 0.88,
"grad_norm": 0.9661378860473633,
"learning_rate": 3.834540214555771e-07,
"loss": 0.628,
"step": 13863
},
{
"epoch": 0.88,
"grad_norm": 0.8079046607017517,
"learning_rate": 3.830600781289151e-07,
"loss": 0.5342,
"step": 13864
},
{
"epoch": 0.88,
"grad_norm": 0.8845729231834412,
"learning_rate": 3.826663292055316e-07,
"loss": 0.5631,
"step": 13865
},
{
"epoch": 0.88,
"grad_norm": 0.9163376092910767,
"learning_rate": 3.822727747020072e-07,
"loss": 0.6073,
"step": 13866
},
{
"epoch": 0.88,
"grad_norm": 0.9018417596817017,
"learning_rate": 3.818794146349114e-07,
"loss": 0.5893,
"step": 13867
},
{
"epoch": 0.88,
"grad_norm": 0.9381417036056519,
"learning_rate": 3.8148624902080764e-07,
"loss": 0.5507,
"step": 13868
},
{
"epoch": 0.88,
"grad_norm": 0.8565617203712463,
"learning_rate": 3.8109327787625273e-07,
"loss": 0.581,
"step": 13869
},
{
"epoch": 0.88,
"grad_norm": 0.8831982016563416,
"learning_rate": 3.807005012177911e-07,
"loss": 0.5437,
"step": 13870
},
{
"epoch": 0.88,
"grad_norm": 0.9772710204124451,
"learning_rate": 3.803079190619624e-07,
"loss": 0.6289,
"step": 13871
},
{
"epoch": 0.88,
"grad_norm": 0.892636239528656,
"learning_rate": 3.7991553142529616e-07,
"loss": 0.5559,
"step": 13872
},
{
"epoch": 0.88,
"grad_norm": 0.8699362277984619,
"learning_rate": 3.7952333832431466e-07,
"loss": 0.5419,
"step": 13873
},
{
"epoch": 0.88,
"grad_norm": 0.8843465447425842,
"learning_rate": 3.7913133977553306e-07,
"loss": 0.5928,
"step": 13874
},
{
"epoch": 0.88,
"grad_norm": 0.9290990233421326,
"learning_rate": 3.7873953579545486e-07,
"loss": 0.5642,
"step": 13875
},
{
"epoch": 0.88,
"grad_norm": 0.9399954080581665,
"learning_rate": 3.783479264005779e-07,
"loss": 0.5733,
"step": 13876
},
{
"epoch": 0.88,
"grad_norm": 0.9060240387916565,
"learning_rate": 3.779565116073941e-07,
"loss": 0.5444,
"step": 13877
},
{
"epoch": 0.88,
"grad_norm": 0.8936532735824585,
"learning_rate": 3.775652914323813e-07,
"loss": 0.6461,
"step": 13878
},
{
"epoch": 0.88,
"grad_norm": 0.9363529086112976,
"learning_rate": 3.771742658920141e-07,
"loss": 0.586,
"step": 13879
},
{
"epoch": 0.88,
"grad_norm": 0.8696059584617615,
"learning_rate": 3.767834350027572e-07,
"loss": 0.5465,
"step": 13880
},
{
"epoch": 0.88,
"grad_norm": 0.8789991736412048,
"learning_rate": 3.7639279878106616e-07,
"loss": 0.5846,
"step": 13881
},
{
"epoch": 0.88,
"grad_norm": 0.9171331524848938,
"learning_rate": 3.7600235724339127e-07,
"loss": 0.5901,
"step": 13882
},
{
"epoch": 0.88,
"grad_norm": 0.8733245134353638,
"learning_rate": 3.756121104061705e-07,
"loss": 0.54,
"step": 13883
},
{
"epoch": 0.88,
"grad_norm": 0.8445425033569336,
"learning_rate": 3.752220582858368e-07,
"loss": 0.5577,
"step": 13884
},
{
"epoch": 0.88,
"grad_norm": 0.8852535486221313,
"learning_rate": 3.748322008988137e-07,
"loss": 0.6001,
"step": 13885
},
{
"epoch": 0.88,
"grad_norm": 0.9022545218467712,
"learning_rate": 3.744425382615169e-07,
"loss": 0.6012,
"step": 13886
},
{
"epoch": 0.88,
"grad_norm": 0.8933830261230469,
"learning_rate": 3.7405307039035387e-07,
"loss": 0.6361,
"step": 13887
},
{
"epoch": 0.88,
"grad_norm": 0.8915839195251465,
"learning_rate": 3.7366379730172376e-07,
"loss": 0.6196,
"step": 13888
},
{
"epoch": 0.88,
"grad_norm": 0.8615158200263977,
"learning_rate": 3.732747190120162e-07,
"loss": 0.6029,
"step": 13889
},
{
"epoch": 0.88,
"grad_norm": 0.8902248740196228,
"learning_rate": 3.728858355376164e-07,
"loss": 0.6026,
"step": 13890
},
{
"epoch": 0.88,
"grad_norm": 0.8872123956680298,
"learning_rate": 3.724971468948968e-07,
"loss": 0.5928,
"step": 13891
},
{
"epoch": 0.88,
"grad_norm": 0.8912094831466675,
"learning_rate": 3.721086531002244e-07,
"loss": 0.588,
"step": 13892
},
{
"epoch": 0.88,
"grad_norm": 0.9006378054618835,
"learning_rate": 3.7172035416995765e-07,
"loss": 0.5815,
"step": 13893
},
{
"epoch": 0.88,
"grad_norm": 0.877053439617157,
"learning_rate": 3.7133225012044585e-07,
"loss": 0.5095,
"step": 13894
},
{
"epoch": 0.88,
"grad_norm": 0.9015281796455383,
"learning_rate": 3.709443409680308e-07,
"loss": 0.5498,
"step": 13895
},
{
"epoch": 0.88,
"grad_norm": 0.8962835073471069,
"learning_rate": 3.7055662672904723e-07,
"loss": 0.569,
"step": 13896
},
{
"epoch": 0.88,
"grad_norm": 0.8946380615234375,
"learning_rate": 3.7016910741981825e-07,
"loss": 0.5726,
"step": 13897
},
{
"epoch": 0.88,
"grad_norm": 0.8510831594467163,
"learning_rate": 3.6978178305666357e-07,
"loss": 0.496,
"step": 13898
},
{
"epoch": 0.88,
"grad_norm": 0.9237155914306641,
"learning_rate": 3.693946536558896e-07,
"loss": 0.5727,
"step": 13899
},
{
"epoch": 0.88,
"grad_norm": 0.8890257477760315,
"learning_rate": 3.6900771923379817e-07,
"loss": 0.5797,
"step": 13900
},
{
"epoch": 0.88,
"grad_norm": 0.931348979473114,
"learning_rate": 3.6862097980668255e-07,
"loss": 0.6333,
"step": 13901
},
{
"epoch": 0.88,
"grad_norm": 0.8788846135139465,
"learning_rate": 3.68234435390824e-07,
"loss": 0.6,
"step": 13902
},
{
"epoch": 0.88,
"grad_norm": 0.9328796863555908,
"learning_rate": 3.6784808600250186e-07,
"loss": 0.5973,
"step": 13903
},
{
"epoch": 0.88,
"grad_norm": 0.9625527262687683,
"learning_rate": 3.674619316579836e-07,
"loss": 0.5795,
"step": 13904
},
{
"epoch": 0.88,
"grad_norm": 0.9201703071594238,
"learning_rate": 3.670759723735273e-07,
"loss": 0.5592,
"step": 13905
},
{
"epoch": 0.88,
"grad_norm": 0.8659148812294006,
"learning_rate": 3.666902081653845e-07,
"loss": 0.5503,
"step": 13906
},
{
"epoch": 0.88,
"grad_norm": 0.9211107492446899,
"learning_rate": 3.663046390497993e-07,
"loss": 0.5797,
"step": 13907
},
{
"epoch": 0.88,
"grad_norm": 0.9264574646949768,
"learning_rate": 3.659192650430066e-07,
"loss": 0.5992,
"step": 13908
},
{
"epoch": 0.88,
"grad_norm": 0.8879082202911377,
"learning_rate": 3.655340861612333e-07,
"loss": 0.5596,
"step": 13909
},
{
"epoch": 0.88,
"grad_norm": 0.8419327139854431,
"learning_rate": 3.6514910242069547e-07,
"loss": 0.5711,
"step": 13910
},
{
"epoch": 0.88,
"grad_norm": 0.8223779797554016,
"learning_rate": 3.647643138376067e-07,
"loss": 0.5581,
"step": 13911
},
{
"epoch": 0.88,
"grad_norm": 0.9604656100273132,
"learning_rate": 3.6437972042816904e-07,
"loss": 0.6241,
"step": 13912
},
{
"epoch": 0.88,
"grad_norm": 0.8464024662971497,
"learning_rate": 3.6399532220857403e-07,
"loss": 0.5518,
"step": 13913
},
{
"epoch": 0.88,
"grad_norm": 0.8726653456687927,
"learning_rate": 3.6361111919500815e-07,
"loss": 0.5917,
"step": 13914
},
{
"epoch": 0.88,
"grad_norm": 0.888209879398346,
"learning_rate": 3.6322711140364953e-07,
"loss": 0.6297,
"step": 13915
},
{
"epoch": 0.88,
"grad_norm": 0.8308035731315613,
"learning_rate": 3.628432988506675e-07,
"loss": 0.5388,
"step": 13916
},
{
"epoch": 0.88,
"grad_norm": 0.9365667700767517,
"learning_rate": 3.6245968155222243e-07,
"loss": 0.5515,
"step": 13917
},
{
"epoch": 0.88,
"grad_norm": 0.8524565100669861,
"learning_rate": 3.6207625952446756e-07,
"loss": 0.5938,
"step": 13918
},
{
"epoch": 0.88,
"grad_norm": 0.8615753650665283,
"learning_rate": 3.616930327835466e-07,
"loss": 0.5269,
"step": 13919
},
{
"epoch": 0.88,
"grad_norm": 0.894736111164093,
"learning_rate": 3.613100013455972e-07,
"loss": 0.6134,
"step": 13920
},
{
"epoch": 0.88,
"grad_norm": 0.9682538509368896,
"learning_rate": 3.609271652267465e-07,
"loss": 0.5901,
"step": 13921
},
{
"epoch": 0.88,
"grad_norm": 0.9055116772651672,
"learning_rate": 3.6054452444311493e-07,
"loss": 0.6037,
"step": 13922
},
{
"epoch": 0.88,
"grad_norm": 0.8035820126533508,
"learning_rate": 3.601620790108135e-07,
"loss": 0.5568,
"step": 13923
},
{
"epoch": 0.88,
"grad_norm": 0.930554211139679,
"learning_rate": 3.597798289459464e-07,
"loss": 0.6585,
"step": 13924
},
{
"epoch": 0.88,
"grad_norm": 0.9319306015968323,
"learning_rate": 3.593977742646088e-07,
"loss": 0.5412,
"step": 13925
},
{
"epoch": 0.88,
"grad_norm": 0.8614120483398438,
"learning_rate": 3.5901591498288755e-07,
"loss": 0.5399,
"step": 13926
},
{
"epoch": 0.88,
"grad_norm": 0.897907555103302,
"learning_rate": 3.5863425111686e-07,
"loss": 0.5419,
"step": 13927
},
{
"epoch": 0.88,
"grad_norm": 0.9247115254402161,
"learning_rate": 3.5825278268259987e-07,
"loss": 0.6008,
"step": 13928
},
{
"epoch": 0.88,
"grad_norm": 0.886035680770874,
"learning_rate": 3.5787150969616657e-07,
"loss": 0.559,
"step": 13929
},
{
"epoch": 0.88,
"grad_norm": 0.9609770774841309,
"learning_rate": 3.57490432173615e-07,
"loss": 0.5855,
"step": 13930
},
{
"epoch": 0.88,
"grad_norm": 0.8835691809654236,
"learning_rate": 3.5710955013099233e-07,
"loss": 0.5454,
"step": 13931
},
{
"epoch": 0.88,
"grad_norm": 0.9028952717781067,
"learning_rate": 3.5672886358433356e-07,
"loss": 0.57,
"step": 13932
},
{
"epoch": 0.88,
"grad_norm": 0.8876438140869141,
"learning_rate": 3.5634837254967023e-07,
"loss": 0.5503,
"step": 13933
},
{
"epoch": 0.88,
"grad_norm": 0.8692540526390076,
"learning_rate": 3.559680770430235e-07,
"loss": 0.5504,
"step": 13934
},
{
"epoch": 0.88,
"grad_norm": 0.8966943025588989,
"learning_rate": 3.555879770804049e-07,
"loss": 0.6109,
"step": 13935
},
{
"epoch": 0.88,
"grad_norm": 0.8818347454071045,
"learning_rate": 3.5520807267782007e-07,
"loss": 0.5318,
"step": 13936
},
{
"epoch": 0.88,
"grad_norm": 0.9468558430671692,
"learning_rate": 3.548283638512651e-07,
"loss": 0.6075,
"step": 13937
},
{
"epoch": 0.88,
"grad_norm": 0.8646183013916016,
"learning_rate": 3.544488506167282e-07,
"loss": 0.5779,
"step": 13938
},
{
"epoch": 0.88,
"grad_norm": 0.8838092088699341,
"learning_rate": 3.5406953299019056e-07,
"loss": 0.5319,
"step": 13939
},
{
"epoch": 0.88,
"grad_norm": 0.9578419327735901,
"learning_rate": 3.5369041098762103e-07,
"loss": 0.5811,
"step": 13940
},
{
"epoch": 0.88,
"grad_norm": 0.9535823464393616,
"learning_rate": 3.5331148462498635e-07,
"loss": 0.5944,
"step": 13941
},
{
"epoch": 0.88,
"grad_norm": 0.8988412618637085,
"learning_rate": 3.529327539182403e-07,
"loss": 0.5615,
"step": 13942
},
{
"epoch": 0.88,
"grad_norm": 0.8499272465705872,
"learning_rate": 3.5255421888332976e-07,
"loss": 0.5468,
"step": 13943
},
{
"epoch": 0.88,
"grad_norm": 0.8369562029838562,
"learning_rate": 3.5217587953619404e-07,
"loss": 0.5457,
"step": 13944
},
{
"epoch": 0.88,
"grad_norm": 0.8756216168403625,
"learning_rate": 3.517977358927632e-07,
"loss": 0.5575,
"step": 13945
},
{
"epoch": 0.88,
"grad_norm": 0.8889182209968567,
"learning_rate": 3.514197879689596e-07,
"loss": 0.5777,
"step": 13946
},
{
"epoch": 0.88,
"grad_norm": 0.9014686942100525,
"learning_rate": 3.5104203578069817e-07,
"loss": 0.5799,
"step": 13947
},
{
"epoch": 0.88,
"grad_norm": 0.8833813071250916,
"learning_rate": 3.506644793438835e-07,
"loss": 0.5552,
"step": 13948
},
{
"epoch": 0.88,
"grad_norm": 0.9554223418235779,
"learning_rate": 3.502871186744128e-07,
"loss": 0.5921,
"step": 13949
},
{
"epoch": 0.88,
"grad_norm": 0.879019021987915,
"learning_rate": 3.499099537881784e-07,
"loss": 0.6137,
"step": 13950
},
{
"epoch": 0.88,
"grad_norm": 0.9680423140525818,
"learning_rate": 3.495329847010581e-07,
"loss": 0.5773,
"step": 13951
},
{
"epoch": 0.88,
"grad_norm": 0.8740622997283936,
"learning_rate": 3.4915621142892595e-07,
"loss": 0.5879,
"step": 13952
},
{
"epoch": 0.88,
"grad_norm": 0.9262283444404602,
"learning_rate": 3.48779633987647e-07,
"loss": 0.5917,
"step": 13953
},
{
"epoch": 0.88,
"grad_norm": 0.8888399004936218,
"learning_rate": 3.4840325239307693e-07,
"loss": 0.6129,
"step": 13954
},
{
"epoch": 0.88,
"grad_norm": 0.855827808380127,
"learning_rate": 3.4802706666106525e-07,
"loss": 0.5782,
"step": 13955
},
{
"epoch": 0.88,
"grad_norm": 0.9697046279907227,
"learning_rate": 3.476510768074498e-07,
"loss": 0.5647,
"step": 13956
},
{
"epoch": 0.88,
"grad_norm": 0.8779304623603821,
"learning_rate": 3.4727528284806247e-07,
"loss": 0.5947,
"step": 13957
},
{
"epoch": 0.88,
"grad_norm": 0.8670563697814941,
"learning_rate": 3.468996847987288e-07,
"loss": 0.5335,
"step": 13958
},
{
"epoch": 0.88,
"grad_norm": 0.9170581102371216,
"learning_rate": 3.4652428267526184e-07,
"loss": 0.6615,
"step": 13959
},
{
"epoch": 0.88,
"grad_norm": 0.8803734183311462,
"learning_rate": 3.4614907649346884e-07,
"loss": 0.5578,
"step": 13960
},
{
"epoch": 0.88,
"grad_norm": 1.0183887481689453,
"learning_rate": 3.4577406626914947e-07,
"loss": 0.6096,
"step": 13961
},
{
"epoch": 0.88,
"grad_norm": 0.911680281162262,
"learning_rate": 3.453992520180921e-07,
"loss": 0.5951,
"step": 13962
},
{
"epoch": 0.88,
"grad_norm": 0.8374574780464172,
"learning_rate": 3.4502463375608143e-07,
"loss": 0.5592,
"step": 13963
},
{
"epoch": 0.88,
"grad_norm": 0.862108588218689,
"learning_rate": 3.446502114988892e-07,
"loss": 0.5704,
"step": 13964
},
{
"epoch": 0.88,
"grad_norm": 0.9024475812911987,
"learning_rate": 3.442759852622812e-07,
"loss": 0.6107,
"step": 13965
},
{
"epoch": 0.88,
"grad_norm": 0.8448460102081299,
"learning_rate": 3.4390195506201594e-07,
"loss": 0.5212,
"step": 13966
},
{
"epoch": 0.88,
"grad_norm": 0.8650651574134827,
"learning_rate": 3.435281209138419e-07,
"loss": 0.5699,
"step": 13967
},
{
"epoch": 0.88,
"grad_norm": 0.8967364430427551,
"learning_rate": 3.4315448283349985e-07,
"loss": 0.58,
"step": 13968
},
{
"epoch": 0.89,
"grad_norm": 0.8695257902145386,
"learning_rate": 3.4278104083672383e-07,
"loss": 0.5894,
"step": 13969
},
{
"epoch": 0.89,
"grad_norm": 0.85512775182724,
"learning_rate": 3.424077949392346e-07,
"loss": 0.5647,
"step": 13970
},
{
"epoch": 0.89,
"grad_norm": 0.908794641494751,
"learning_rate": 3.4203474515675293e-07,
"loss": 0.5832,
"step": 13971
},
{
"epoch": 0.89,
"grad_norm": 0.859935462474823,
"learning_rate": 3.4166189150498297e-07,
"loss": 0.5769,
"step": 13972
},
{
"epoch": 0.89,
"grad_norm": 0.892041802406311,
"learning_rate": 3.4128923399962543e-07,
"loss": 0.5362,
"step": 13973
},
{
"epoch": 0.89,
"grad_norm": 0.9205070734024048,
"learning_rate": 3.4091677265637224e-07,
"loss": 0.6354,
"step": 13974
},
{
"epoch": 0.89,
"grad_norm": 0.8668642640113831,
"learning_rate": 3.405445074909053e-07,
"loss": 0.5666,
"step": 13975
},
{
"epoch": 0.89,
"grad_norm": 0.8559014797210693,
"learning_rate": 3.401724385189009e-07,
"loss": 0.528,
"step": 13976
},
{
"epoch": 0.89,
"grad_norm": 0.8495075702667236,
"learning_rate": 3.398005657560249e-07,
"loss": 0.568,
"step": 13977
},
{
"epoch": 0.89,
"grad_norm": 0.8912281394004822,
"learning_rate": 3.394288892179348e-07,
"loss": 0.5637,
"step": 13978
},
{
"epoch": 0.89,
"grad_norm": 0.8422101140022278,
"learning_rate": 3.390574089202814e-07,
"loss": 0.5824,
"step": 13979
},
{
"epoch": 0.89,
"grad_norm": 0.8872683048248291,
"learning_rate": 3.3868612487870657e-07,
"loss": 0.6023,
"step": 13980
},
{
"epoch": 0.89,
"grad_norm": 0.871668815612793,
"learning_rate": 3.3831503710884286e-07,
"loss": 0.5728,
"step": 13981
},
{
"epoch": 0.89,
"grad_norm": 0.9398965835571289,
"learning_rate": 3.379441456263166e-07,
"loss": 0.5988,
"step": 13982
},
{
"epoch": 0.89,
"grad_norm": 0.804894208908081,
"learning_rate": 3.375734504467437e-07,
"loss": 0.545,
"step": 13983
},
{
"epoch": 0.89,
"grad_norm": 0.8272179961204529,
"learning_rate": 3.372029515857339e-07,
"loss": 0.5751,
"step": 13984
},
{
"epoch": 0.89,
"grad_norm": 0.8695221543312073,
"learning_rate": 3.368326490588875e-07,
"loss": 0.5484,
"step": 13985
},
{
"epoch": 0.89,
"grad_norm": 0.865994930267334,
"learning_rate": 3.364625428817958e-07,
"loss": 0.5282,
"step": 13986
},
{
"epoch": 0.89,
"grad_norm": 0.9086118340492249,
"learning_rate": 3.360926330700431e-07,
"loss": 0.573,
"step": 13987
},
{
"epoch": 0.89,
"grad_norm": 0.8299586772918701,
"learning_rate": 3.3572291963920536e-07,
"loss": 0.5297,
"step": 13988
},
{
"epoch": 0.89,
"grad_norm": 0.8901399970054626,
"learning_rate": 3.353534026048494e-07,
"loss": 0.6119,
"step": 13989
},
{
"epoch": 0.89,
"grad_norm": 0.8746377229690552,
"learning_rate": 3.3498408198253453e-07,
"loss": 0.5493,
"step": 13990
},
{
"epoch": 0.89,
"grad_norm": 0.8297358751296997,
"learning_rate": 3.3461495778781104e-07,
"loss": 0.5338,
"step": 13991
},
{
"epoch": 0.89,
"grad_norm": 0.826378345489502,
"learning_rate": 3.342460300362227e-07,
"loss": 0.5438,
"step": 13992
},
{
"epoch": 0.89,
"grad_norm": 0.8547459244728088,
"learning_rate": 3.3387729874330367e-07,
"loss": 0.5513,
"step": 13993
},
{
"epoch": 0.89,
"grad_norm": 0.8909960985183716,
"learning_rate": 3.335087639245782e-07,
"loss": 0.6038,
"step": 13994
},
{
"epoch": 0.89,
"grad_norm": 0.992056131362915,
"learning_rate": 3.331404255955656e-07,
"loss": 0.5645,
"step": 13995
},
{
"epoch": 0.89,
"grad_norm": 0.8902263641357422,
"learning_rate": 3.327722837717745e-07,
"loss": 0.5573,
"step": 13996
},
{
"epoch": 0.89,
"grad_norm": 0.896858274936676,
"learning_rate": 3.32404338468707e-07,
"loss": 0.5627,
"step": 13997
},
{
"epoch": 0.89,
"grad_norm": 0.9204726219177246,
"learning_rate": 3.320365897018546e-07,
"loss": 0.5782,
"step": 13998
},
{
"epoch": 0.89,
"grad_norm": 0.9295701384544373,
"learning_rate": 3.316690374867043e-07,
"loss": 0.5769,
"step": 13999
},
{
"epoch": 0.89,
"grad_norm": 0.8957133293151855,
"learning_rate": 3.313016818387288e-07,
"loss": 0.5495,
"step": 14000
},
{
"epoch": 0.89,
"grad_norm": 0.850740909576416,
"learning_rate": 3.309345227734001e-07,
"loss": 0.5891,
"step": 14001
},
{
"epoch": 0.89,
"grad_norm": 0.8553286790847778,
"learning_rate": 3.305675603061753e-07,
"loss": 0.5852,
"step": 14002
},
{
"epoch": 0.89,
"grad_norm": 0.8360522389411926,
"learning_rate": 3.3020079445250655e-07,
"loss": 0.6143,
"step": 14003
},
{
"epoch": 0.89,
"grad_norm": 0.8745806813240051,
"learning_rate": 3.2983422522783747e-07,
"loss": 0.5673,
"step": 14004
},
{
"epoch": 0.89,
"grad_norm": 0.8642773032188416,
"learning_rate": 3.2946785264760305e-07,
"loss": 0.5315,
"step": 14005
},
{
"epoch": 0.89,
"grad_norm": 0.9193217158317566,
"learning_rate": 3.291016767272298e-07,
"loss": 0.6075,
"step": 14006
},
{
"epoch": 0.89,
"grad_norm": 0.8983094692230225,
"learning_rate": 3.287356974821365e-07,
"loss": 0.5878,
"step": 14007
},
{
"epoch": 0.89,
"grad_norm": 0.8727191090583801,
"learning_rate": 3.28369914927732e-07,
"loss": 0.5405,
"step": 14008
},
{
"epoch": 0.89,
"grad_norm": 0.8366736769676208,
"learning_rate": 3.2800432907941935e-07,
"loss": 0.5571,
"step": 14009
},
{
"epoch": 0.89,
"grad_norm": 0.9027994275093079,
"learning_rate": 3.276389399525914e-07,
"loss": 0.5512,
"step": 14010
},
{
"epoch": 0.89,
"grad_norm": 0.9058107137680054,
"learning_rate": 3.272737475626342e-07,
"loss": 0.5707,
"step": 14011
},
{
"epoch": 0.89,
"grad_norm": 0.9187793731689453,
"learning_rate": 3.269087519249242e-07,
"loss": 0.5556,
"step": 14012
},
{
"epoch": 0.89,
"grad_norm": 0.8657212257385254,
"learning_rate": 3.2654395305482924e-07,
"loss": 0.5968,
"step": 14013
},
{
"epoch": 0.89,
"grad_norm": 0.8940473198890686,
"learning_rate": 3.2617935096771137e-07,
"loss": 0.5712,
"step": 14014
},
{
"epoch": 0.89,
"grad_norm": 0.876758873462677,
"learning_rate": 3.258149456789228e-07,
"loss": 0.5664,
"step": 14015
},
{
"epoch": 0.89,
"grad_norm": 0.9137895703315735,
"learning_rate": 3.2545073720380573e-07,
"loss": 0.5223,
"step": 14016
},
{
"epoch": 0.89,
"grad_norm": 0.9135635495185852,
"learning_rate": 3.2508672555769617e-07,
"loss": 0.5516,
"step": 14017
},
{
"epoch": 0.89,
"grad_norm": 1.005510926246643,
"learning_rate": 3.2472291075592246e-07,
"loss": 0.6037,
"step": 14018
},
{
"epoch": 0.89,
"grad_norm": 0.8511359691619873,
"learning_rate": 3.243592928138023e-07,
"loss": 0.542,
"step": 14019
},
{
"epoch": 0.89,
"grad_norm": 0.8994077444076538,
"learning_rate": 3.2399587174664794e-07,
"loss": 0.5644,
"step": 14020
},
{
"epoch": 0.89,
"grad_norm": 0.8860614895820618,
"learning_rate": 3.236326475697593e-07,
"loss": 0.5672,
"step": 14021
},
{
"epoch": 0.89,
"grad_norm": 0.9307529926300049,
"learning_rate": 3.232696202984326e-07,
"loss": 0.5568,
"step": 14022
},
{
"epoch": 0.89,
"grad_norm": 0.7981402277946472,
"learning_rate": 3.2290678994795377e-07,
"loss": 0.537,
"step": 14023
},
{
"epoch": 0.89,
"grad_norm": 0.8489423990249634,
"learning_rate": 3.2254415653359906e-07,
"loss": 0.5791,
"step": 14024
},
{
"epoch": 0.89,
"grad_norm": 0.9137423634529114,
"learning_rate": 3.2218172007063787e-07,
"loss": 0.6229,
"step": 14025
},
{
"epoch": 0.89,
"grad_norm": 0.8917653560638428,
"learning_rate": 3.218194805743319e-07,
"loss": 0.5276,
"step": 14026
},
{
"epoch": 0.89,
"grad_norm": 0.8989799618721008,
"learning_rate": 3.2145743805993334e-07,
"loss": 0.5448,
"step": 14027
},
{
"epoch": 0.89,
"grad_norm": 0.8666809797286987,
"learning_rate": 3.210955925426873e-07,
"loss": 0.5514,
"step": 14028
},
{
"epoch": 0.89,
"grad_norm": 0.9025545120239258,
"learning_rate": 3.2073394403782823e-07,
"loss": 0.6176,
"step": 14029
},
{
"epoch": 0.89,
"grad_norm": 0.8828505873680115,
"learning_rate": 3.2037249256058445e-07,
"loss": 0.6386,
"step": 14030
},
{
"epoch": 0.89,
"grad_norm": 0.9091066122055054,
"learning_rate": 3.2001123812617663e-07,
"loss": 0.5939,
"step": 14031
},
{
"epoch": 0.89,
"grad_norm": 0.9628225564956665,
"learning_rate": 3.196501807498148e-07,
"loss": 0.5717,
"step": 14032
},
{
"epoch": 0.89,
"grad_norm": 0.8795494437217712,
"learning_rate": 3.192893204467018e-07,
"loss": 0.5421,
"step": 14033
},
{
"epoch": 0.89,
"grad_norm": 0.9152184128761292,
"learning_rate": 3.189286572320327e-07,
"loss": 0.596,
"step": 14034
},
{
"epoch": 0.89,
"grad_norm": 0.9376192092895508,
"learning_rate": 3.185681911209937e-07,
"loss": 0.5486,
"step": 14035
},
{
"epoch": 0.89,
"grad_norm": 0.9213606119155884,
"learning_rate": 3.1820792212876316e-07,
"loss": 0.5384,
"step": 14036
},
{
"epoch": 0.89,
"grad_norm": 0.9375748038291931,
"learning_rate": 3.178478502705101e-07,
"loss": 0.5772,
"step": 14037
},
{
"epoch": 0.89,
"grad_norm": 0.900903582572937,
"learning_rate": 3.174879755613952e-07,
"loss": 0.5485,
"step": 14038
},
{
"epoch": 0.89,
"grad_norm": 0.9398730993270874,
"learning_rate": 3.1712829801657294e-07,
"loss": 0.5505,
"step": 14039
},
{
"epoch": 0.89,
"grad_norm": 0.9179508686065674,
"learning_rate": 3.167688176511874e-07,
"loss": 0.6111,
"step": 14040
},
{
"epoch": 0.89,
"grad_norm": 0.916954755783081,
"learning_rate": 3.1640953448037527e-07,
"loss": 0.605,
"step": 14041
},
{
"epoch": 0.89,
"grad_norm": 0.8982189893722534,
"learning_rate": 3.1605044851926504e-07,
"loss": 0.5572,
"step": 14042
},
{
"epoch": 0.89,
"grad_norm": 0.8893555998802185,
"learning_rate": 3.1569155978297463e-07,
"loss": 0.5713,
"step": 14043
},
{
"epoch": 0.89,
"grad_norm": 0.9181665182113647,
"learning_rate": 3.1533286828661915e-07,
"loss": 0.5584,
"step": 14044
},
{
"epoch": 0.89,
"grad_norm": 0.8733421564102173,
"learning_rate": 3.1497437404529875e-07,
"loss": 0.6073,
"step": 14045
},
{
"epoch": 0.89,
"grad_norm": 0.8900958895683289,
"learning_rate": 3.1461607707410914e-07,
"loss": 0.5909,
"step": 14046
},
{
"epoch": 0.89,
"grad_norm": 0.9942273497581482,
"learning_rate": 3.142579773881377e-07,
"loss": 0.5821,
"step": 14047
},
{
"epoch": 0.89,
"grad_norm": 0.87491375207901,
"learning_rate": 3.1390007500246236e-07,
"loss": 0.576,
"step": 14048
},
{
"epoch": 0.89,
"grad_norm": 0.9551854729652405,
"learning_rate": 3.135423699321527e-07,
"loss": 0.632,
"step": 14049
},
{
"epoch": 0.89,
"grad_norm": 0.8860867023468018,
"learning_rate": 3.131848621922717e-07,
"loss": 0.5733,
"step": 14050
},
{
"epoch": 0.89,
"grad_norm": 0.8768588900566101,
"learning_rate": 3.128275517978707e-07,
"loss": 0.5568,
"step": 14051
},
{
"epoch": 0.89,
"grad_norm": 0.9353142976760864,
"learning_rate": 3.124704387639976e-07,
"loss": 0.6497,
"step": 14052
},
{
"epoch": 0.89,
"grad_norm": 0.9081913828849792,
"learning_rate": 3.1211352310568655e-07,
"loss": 0.6053,
"step": 14053
},
{
"epoch": 0.89,
"grad_norm": 0.9168758988380432,
"learning_rate": 3.1175680483796713e-07,
"loss": 0.5753,
"step": 14054
},
{
"epoch": 0.89,
"grad_norm": 0.8890372514724731,
"learning_rate": 3.1140028397585953e-07,
"loss": 0.5491,
"step": 14055
},
{
"epoch": 0.89,
"grad_norm": 0.8347086906433105,
"learning_rate": 3.110439605343751e-07,
"loss": 0.5384,
"step": 14056
},
{
"epoch": 0.89,
"grad_norm": 0.9348717331886292,
"learning_rate": 3.1068783452851856e-07,
"loss": 0.6164,
"step": 14057
},
{
"epoch": 0.89,
"grad_norm": 0.8585134744644165,
"learning_rate": 3.1033190597328456e-07,
"loss": 0.6011,
"step": 14058
},
{
"epoch": 0.89,
"grad_norm": 0.843673586845398,
"learning_rate": 3.099761748836594e-07,
"loss": 0.5566,
"step": 14059
},
{
"epoch": 0.89,
"grad_norm": 0.8475186824798584,
"learning_rate": 3.0962064127462167e-07,
"loss": 0.5611,
"step": 14060
},
{
"epoch": 0.89,
"grad_norm": 0.8992692232131958,
"learning_rate": 3.092653051611427e-07,
"loss": 0.5382,
"step": 14061
},
{
"epoch": 0.89,
"grad_norm": 0.8602596521377563,
"learning_rate": 3.089101665581834e-07,
"loss": 0.6269,
"step": 14062
},
{
"epoch": 0.89,
"grad_norm": 0.8721755146980286,
"learning_rate": 3.085552254806978e-07,
"loss": 0.5552,
"step": 14063
},
{
"epoch": 0.89,
"grad_norm": 0.9432767629623413,
"learning_rate": 3.0820048194363183e-07,
"loss": 0.6001,
"step": 14064
},
{
"epoch": 0.89,
"grad_norm": 0.883188009262085,
"learning_rate": 3.0784593596192123e-07,
"loss": 0.5695,
"step": 14065
},
{
"epoch": 0.89,
"grad_norm": 0.8545171022415161,
"learning_rate": 3.074915875504969e-07,
"loss": 0.5165,
"step": 14066
},
{
"epoch": 0.89,
"grad_norm": 0.782908022403717,
"learning_rate": 3.0713743672427686e-07,
"loss": 0.4764,
"step": 14067
},
{
"epoch": 0.89,
"grad_norm": 0.8021277785301208,
"learning_rate": 3.067834834981731e-07,
"loss": 0.521,
"step": 14068
},
{
"epoch": 0.89,
"grad_norm": 0.8493959903717041,
"learning_rate": 3.0642972788709203e-07,
"loss": 0.5231,
"step": 14069
},
{
"epoch": 0.89,
"grad_norm": 0.8718252778053284,
"learning_rate": 3.060761699059267e-07,
"loss": 0.562,
"step": 14070
},
{
"epoch": 0.89,
"grad_norm": 0.9048642516136169,
"learning_rate": 3.057228095695647e-07,
"loss": 0.6242,
"step": 14071
},
{
"epoch": 0.89,
"grad_norm": 0.8449458479881287,
"learning_rate": 3.053696468928857e-07,
"loss": 0.5006,
"step": 14072
},
{
"epoch": 0.89,
"grad_norm": 0.901394248008728,
"learning_rate": 3.0501668189075794e-07,
"loss": 0.5834,
"step": 14073
},
{
"epoch": 0.89,
"grad_norm": 0.9242204427719116,
"learning_rate": 3.0466391457804666e-07,
"loss": 0.5403,
"step": 14074
},
{
"epoch": 0.89,
"grad_norm": 0.8967319130897522,
"learning_rate": 3.0431134496960333e-07,
"loss": 0.6035,
"step": 14075
},
{
"epoch": 0.89,
"grad_norm": 0.8391687870025635,
"learning_rate": 3.0395897308027443e-07,
"loss": 0.5579,
"step": 14076
},
{
"epoch": 0.89,
"grad_norm": 0.9151217937469482,
"learning_rate": 3.0360679892489643e-07,
"loss": 0.5881,
"step": 14077
},
{
"epoch": 0.89,
"grad_norm": 0.8429851531982422,
"learning_rate": 3.032548225182985e-07,
"loss": 0.5864,
"step": 14078
},
{
"epoch": 0.89,
"grad_norm": 0.8832830786705017,
"learning_rate": 3.029030438753017e-07,
"loss": 0.5989,
"step": 14079
},
{
"epoch": 0.89,
"grad_norm": 0.859230101108551,
"learning_rate": 3.025514630107179e-07,
"loss": 0.5483,
"step": 14080
},
{
"epoch": 0.89,
"grad_norm": 0.8875642418861389,
"learning_rate": 3.0220007993934987e-07,
"loss": 0.6254,
"step": 14081
},
{
"epoch": 0.89,
"grad_norm": 0.8424782156944275,
"learning_rate": 3.018488946759951e-07,
"loss": 0.5502,
"step": 14082
},
{
"epoch": 0.89,
"grad_norm": 0.8858059644699097,
"learning_rate": 3.01497907235439e-07,
"loss": 0.5659,
"step": 14083
},
{
"epoch": 0.89,
"grad_norm": 0.8075915575027466,
"learning_rate": 3.0114711763246096e-07,
"loss": 0.53,
"step": 14084
},
{
"epoch": 0.89,
"grad_norm": 0.8604494333267212,
"learning_rate": 3.007965258818324e-07,
"loss": 0.6075,
"step": 14085
},
{
"epoch": 0.89,
"grad_norm": 0.9078390002250671,
"learning_rate": 3.0044613199831373e-07,
"loss": 0.5984,
"step": 14086
},
{
"epoch": 0.89,
"grad_norm": 0.9238450527191162,
"learning_rate": 3.0009593599666044e-07,
"loss": 0.6284,
"step": 14087
},
{
"epoch": 0.89,
"grad_norm": 0.9430950284004211,
"learning_rate": 2.9974593789161843e-07,
"loss": 0.5938,
"step": 14088
},
{
"epoch": 0.89,
"grad_norm": 0.9374951720237732,
"learning_rate": 2.9939613769792265e-07,
"loss": 0.5881,
"step": 14089
},
{
"epoch": 0.89,
"grad_norm": 0.8708373308181763,
"learning_rate": 2.9904653543030406e-07,
"loss": 0.6102,
"step": 14090
},
{
"epoch": 0.89,
"grad_norm": 0.926876425743103,
"learning_rate": 2.98697131103482e-07,
"loss": 0.5941,
"step": 14091
},
{
"epoch": 0.89,
"grad_norm": 0.8974210619926453,
"learning_rate": 2.983479247321691e-07,
"loss": 0.6017,
"step": 14092
},
{
"epoch": 0.89,
"grad_norm": 0.8967876434326172,
"learning_rate": 2.979989163310704e-07,
"loss": 0.5812,
"step": 14093
},
{
"epoch": 0.89,
"grad_norm": 0.914035975933075,
"learning_rate": 2.976501059148779e-07,
"loss": 0.5896,
"step": 14094
},
{
"epoch": 0.89,
"grad_norm": 0.8700651526451111,
"learning_rate": 2.9730149349828265e-07,
"loss": 0.4908,
"step": 14095
},
{
"epoch": 0.89,
"grad_norm": 0.9540930390357971,
"learning_rate": 2.969530790959624e-07,
"loss": 0.5677,
"step": 14096
},
{
"epoch": 0.89,
"grad_norm": 0.9152606725692749,
"learning_rate": 2.9660486272258703e-07,
"loss": 0.5638,
"step": 14097
},
{
"epoch": 0.89,
"grad_norm": 0.9777679443359375,
"learning_rate": 2.9625684439281875e-07,
"loss": 0.5569,
"step": 14098
},
{
"epoch": 0.89,
"grad_norm": 0.9192463159561157,
"learning_rate": 2.959090241213114e-07,
"loss": 0.608,
"step": 14099
},
{
"epoch": 0.89,
"grad_norm": 0.9115022420883179,
"learning_rate": 2.9556140192271045e-07,
"loss": 0.6048,
"step": 14100
},
{
"epoch": 0.89,
"grad_norm": 0.8501712679862976,
"learning_rate": 2.9521397781165475e-07,
"loss": 0.5831,
"step": 14101
},
{
"epoch": 0.89,
"grad_norm": 0.8686020374298096,
"learning_rate": 2.9486675180277035e-07,
"loss": 0.5704,
"step": 14102
},
{
"epoch": 0.89,
"grad_norm": 0.8605404496192932,
"learning_rate": 2.9451972391067897e-07,
"loss": 0.5463,
"step": 14103
},
{
"epoch": 0.89,
"grad_norm": 0.873845636844635,
"learning_rate": 2.941728941499938e-07,
"loss": 0.531,
"step": 14104
},
{
"epoch": 0.89,
"grad_norm": 0.934984564781189,
"learning_rate": 2.938262625353172e-07,
"loss": 0.5583,
"step": 14105
},
{
"epoch": 0.89,
"grad_norm": 0.8914570808410645,
"learning_rate": 2.934798290812446e-07,
"loss": 0.6008,
"step": 14106
},
{
"epoch": 0.89,
"grad_norm": 0.8697715401649475,
"learning_rate": 2.931335938023644e-07,
"loss": 0.5582,
"step": 14107
},
{
"epoch": 0.89,
"grad_norm": 0.9065754413604736,
"learning_rate": 2.9278755671325377e-07,
"loss": 0.5708,
"step": 14108
},
{
"epoch": 0.89,
"grad_norm": 0.8564165830612183,
"learning_rate": 2.924417178284855e-07,
"loss": 0.5854,
"step": 14109
},
{
"epoch": 0.89,
"grad_norm": 0.8233086466789246,
"learning_rate": 2.9209607716261856e-07,
"loss": 0.5431,
"step": 14110
},
{
"epoch": 0.89,
"grad_norm": 0.8898478746414185,
"learning_rate": 2.917506347302079e-07,
"loss": 0.5987,
"step": 14111
},
{
"epoch": 0.89,
"grad_norm": 0.9087411761283875,
"learning_rate": 2.9140539054580087e-07,
"loss": 0.586,
"step": 14112
},
{
"epoch": 0.89,
"grad_norm": 0.8777049779891968,
"learning_rate": 2.9106034462393187e-07,
"loss": 0.534,
"step": 14113
},
{
"epoch": 0.89,
"grad_norm": 0.8712142109870911,
"learning_rate": 2.9071549697913035e-07,
"loss": 0.564,
"step": 14114
},
{
"epoch": 0.89,
"grad_norm": 0.8860015869140625,
"learning_rate": 2.9037084762591704e-07,
"loss": 0.492,
"step": 14115
},
{
"epoch": 0.89,
"grad_norm": 0.9180863499641418,
"learning_rate": 2.900263965788036e-07,
"loss": 0.6046,
"step": 14116
},
{
"epoch": 0.89,
"grad_norm": 0.8687685132026672,
"learning_rate": 2.8968214385229453e-07,
"loss": 0.5626,
"step": 14117
},
{
"epoch": 0.89,
"grad_norm": 0.9041091203689575,
"learning_rate": 2.8933808946088383e-07,
"loss": 0.5691,
"step": 14118
},
{
"epoch": 0.89,
"grad_norm": 0.9091986417770386,
"learning_rate": 2.889942334190593e-07,
"loss": 0.5794,
"step": 14119
},
{
"epoch": 0.89,
"grad_norm": 0.8923031687736511,
"learning_rate": 2.8865057574129883e-07,
"loss": 0.5731,
"step": 14120
},
{
"epoch": 0.89,
"grad_norm": 0.8908477425575256,
"learning_rate": 2.8830711644207257e-07,
"loss": 0.5569,
"step": 14121
},
{
"epoch": 0.89,
"grad_norm": 1.0278847217559814,
"learning_rate": 2.8796385553584326e-07,
"loss": 0.59,
"step": 14122
},
{
"epoch": 0.89,
"grad_norm": 0.8459283709526062,
"learning_rate": 2.8762079303706505e-07,
"loss": 0.5899,
"step": 14123
},
{
"epoch": 0.89,
"grad_norm": 0.8664804100990295,
"learning_rate": 2.8727792896018015e-07,
"loss": 0.56,
"step": 14124
},
{
"epoch": 0.89,
"grad_norm": 0.8533109426498413,
"learning_rate": 2.8693526331962875e-07,
"loss": 0.5472,
"step": 14125
},
{
"epoch": 0.89,
"grad_norm": 0.8533556461334229,
"learning_rate": 2.865927961298376e-07,
"loss": 0.5552,
"step": 14126
},
{
"epoch": 0.9,
"grad_norm": 0.8829198479652405,
"learning_rate": 2.8625052740522683e-07,
"loss": 0.5775,
"step": 14127
},
{
"epoch": 0.9,
"grad_norm": 0.92991703748703,
"learning_rate": 2.859084571602083e-07,
"loss": 0.5723,
"step": 14128
},
{
"epoch": 0.9,
"grad_norm": 0.9061645269393921,
"learning_rate": 2.8556658540918603e-07,
"loss": 0.5473,
"step": 14129
},
{
"epoch": 0.9,
"grad_norm": 0.8385295867919922,
"learning_rate": 2.8522491216655403e-07,
"loss": 0.5513,
"step": 14130
},
{
"epoch": 0.9,
"grad_norm": 0.8606228828430176,
"learning_rate": 2.848834374467002e-07,
"loss": 0.5805,
"step": 14131
},
{
"epoch": 0.9,
"grad_norm": 0.9587467908859253,
"learning_rate": 2.8454216126400146e-07,
"loss": 0.6116,
"step": 14132
},
{
"epoch": 0.9,
"grad_norm": 0.9845794439315796,
"learning_rate": 2.842010836328274e-07,
"loss": 0.6159,
"step": 14133
},
{
"epoch": 0.9,
"grad_norm": 0.8591241240501404,
"learning_rate": 2.838602045675426e-07,
"loss": 0.5634,
"step": 14134
},
{
"epoch": 0.9,
"grad_norm": 0.916466236114502,
"learning_rate": 2.8351952408249726e-07,
"loss": 0.5489,
"step": 14135
},
{
"epoch": 0.9,
"grad_norm": 0.851662278175354,
"learning_rate": 2.831790421920377e-07,
"loss": 0.5919,
"step": 14136
},
{
"epoch": 0.9,
"grad_norm": 0.8946172595024109,
"learning_rate": 2.828387589104997e-07,
"loss": 0.6006,
"step": 14137
},
{
"epoch": 0.9,
"grad_norm": 0.8727723360061646,
"learning_rate": 2.824986742522118e-07,
"loss": 0.5747,
"step": 14138
},
{
"epoch": 0.9,
"grad_norm": 0.9003124237060547,
"learning_rate": 2.8215878823149466e-07,
"loss": 0.6208,
"step": 14139
},
{
"epoch": 0.9,
"grad_norm": 0.9446995258331299,
"learning_rate": 2.818191008626581e-07,
"loss": 0.5331,
"step": 14140
},
{
"epoch": 0.9,
"grad_norm": 0.8727278709411621,
"learning_rate": 2.8147961216000497e-07,
"loss": 0.5418,
"step": 14141
},
{
"epoch": 0.9,
"grad_norm": 0.8375770449638367,
"learning_rate": 2.8114032213783226e-07,
"loss": 0.5109,
"step": 14142
},
{
"epoch": 0.9,
"grad_norm": 0.8434416055679321,
"learning_rate": 2.808012308104241e-07,
"loss": 0.6113,
"step": 14143
},
{
"epoch": 0.9,
"grad_norm": 0.8837141394615173,
"learning_rate": 2.80462338192059e-07,
"loss": 0.5899,
"step": 14144
},
{
"epoch": 0.9,
"grad_norm": 0.8532682657241821,
"learning_rate": 2.801236442970073e-07,
"loss": 0.5329,
"step": 14145
},
{
"epoch": 0.9,
"grad_norm": 0.8666834831237793,
"learning_rate": 2.797851491395293e-07,
"loss": 0.5825,
"step": 14146
},
{
"epoch": 0.9,
"grad_norm": 0.9193355441093445,
"learning_rate": 2.79446852733879e-07,
"loss": 0.6144,
"step": 14147
},
{
"epoch": 0.9,
"grad_norm": 0.862277090549469,
"learning_rate": 2.791087550942995e-07,
"loss": 0.5617,
"step": 14148
},
{
"epoch": 0.9,
"grad_norm": 0.8777857422828674,
"learning_rate": 2.7877085623502775e-07,
"loss": 0.5741,
"step": 14149
},
{
"epoch": 0.9,
"grad_norm": 0.9369240999221802,
"learning_rate": 2.784331561702908e-07,
"loss": 0.5701,
"step": 14150
},
{
"epoch": 0.9,
"grad_norm": 0.8633100986480713,
"learning_rate": 2.780956549143088e-07,
"loss": 0.5204,
"step": 14151
},
{
"epoch": 0.9,
"grad_norm": 0.8361502289772034,
"learning_rate": 2.7775835248129267e-07,
"loss": 0.5389,
"step": 14152
},
{
"epoch": 0.9,
"grad_norm": 0.9618591070175171,
"learning_rate": 2.7742124888544497e-07,
"loss": 0.5818,
"step": 14153
},
{
"epoch": 0.9,
"grad_norm": 0.8982853889465332,
"learning_rate": 2.7708434414095875e-07,
"loss": 0.5917,
"step": 14154
},
{
"epoch": 0.9,
"grad_norm": 0.9148767590522766,
"learning_rate": 2.7674763826202265e-07,
"loss": 0.5946,
"step": 14155
},
{
"epoch": 0.9,
"grad_norm": 0.8875370621681213,
"learning_rate": 2.764111312628115e-07,
"loss": 0.6017,
"step": 14156
},
{
"epoch": 0.9,
"grad_norm": 0.9090349078178406,
"learning_rate": 2.7607482315749554e-07,
"loss": 0.5424,
"step": 14157
},
{
"epoch": 0.9,
"grad_norm": 0.9224393963813782,
"learning_rate": 2.757387139602352e-07,
"loss": 0.5548,
"step": 14158
},
{
"epoch": 0.9,
"grad_norm": 0.8845816850662231,
"learning_rate": 2.754028036851836e-07,
"loss": 0.5169,
"step": 14159
},
{
"epoch": 0.9,
"grad_norm": 0.880143404006958,
"learning_rate": 2.750670923464838e-07,
"loss": 0.5661,
"step": 14160
},
{
"epoch": 0.9,
"grad_norm": 0.9638619422912598,
"learning_rate": 2.747315799582728e-07,
"loss": 0.5205,
"step": 14161
},
{
"epoch": 0.9,
"grad_norm": 0.87690269947052,
"learning_rate": 2.7439626653467555e-07,
"loss": 0.6158,
"step": 14162
},
{
"epoch": 0.9,
"grad_norm": 0.9064611196517944,
"learning_rate": 2.7406115208981345e-07,
"loss": 0.5675,
"step": 14163
},
{
"epoch": 0.9,
"grad_norm": 0.9225680232048035,
"learning_rate": 2.7372623663779575e-07,
"loss": 0.5312,
"step": 14164
},
{
"epoch": 0.9,
"grad_norm": 0.8610286116600037,
"learning_rate": 2.733915201927245e-07,
"loss": 0.563,
"step": 14165
},
{
"epoch": 0.9,
"grad_norm": 0.8890798091888428,
"learning_rate": 2.7305700276869406e-07,
"loss": 0.5899,
"step": 14166
},
{
"epoch": 0.9,
"grad_norm": 0.8515585660934448,
"learning_rate": 2.727226843797881e-07,
"loss": 0.5643,
"step": 14167
},
{
"epoch": 0.9,
"grad_norm": 0.8853866457939148,
"learning_rate": 2.7238856504008594e-07,
"loss": 0.6033,
"step": 14168
},
{
"epoch": 0.9,
"grad_norm": 0.8813034296035767,
"learning_rate": 2.7205464476365575e-07,
"loss": 0.5382,
"step": 14169
},
{
"epoch": 0.9,
"grad_norm": 0.9416490197181702,
"learning_rate": 2.7172092356455626e-07,
"loss": 0.6018,
"step": 14170
},
{
"epoch": 0.9,
"grad_norm": 0.9497674703598022,
"learning_rate": 2.7138740145684017e-07,
"loss": 0.6127,
"step": 14171
},
{
"epoch": 0.9,
"grad_norm": 0.9365571737289429,
"learning_rate": 2.7105407845455124e-07,
"loss": 0.631,
"step": 14172
},
{
"epoch": 0.9,
"grad_norm": 0.8589094877243042,
"learning_rate": 2.707209545717238e-07,
"loss": 0.5762,
"step": 14173
},
{
"epoch": 0.9,
"grad_norm": 0.8943716287612915,
"learning_rate": 2.70388029822386e-07,
"loss": 0.5679,
"step": 14174
},
{
"epoch": 0.9,
"grad_norm": 0.9539296627044678,
"learning_rate": 2.700553042205539e-07,
"loss": 0.5642,
"step": 14175
},
{
"epoch": 0.9,
"grad_norm": 0.8620119690895081,
"learning_rate": 2.6972277778023913e-07,
"loss": 0.5144,
"step": 14176
},
{
"epoch": 0.9,
"grad_norm": 0.8798508644104004,
"learning_rate": 2.693904505154432e-07,
"loss": 0.5847,
"step": 14177
},
{
"epoch": 0.9,
"grad_norm": 0.9159492254257202,
"learning_rate": 2.690583224401588e-07,
"loss": 0.5889,
"step": 14178
},
{
"epoch": 0.9,
"grad_norm": 0.8451624512672424,
"learning_rate": 2.687263935683704e-07,
"loss": 0.5517,
"step": 14179
},
{
"epoch": 0.9,
"grad_norm": 0.9503071308135986,
"learning_rate": 2.6839466391405444e-07,
"loss": 0.6165,
"step": 14180
},
{
"epoch": 0.9,
"grad_norm": 0.8462880849838257,
"learning_rate": 2.680631334911793e-07,
"loss": 0.5567,
"step": 14181
},
{
"epoch": 0.9,
"grad_norm": 0.94367516040802,
"learning_rate": 2.677318023137049e-07,
"loss": 0.6164,
"step": 14182
},
{
"epoch": 0.9,
"grad_norm": 0.9024264216423035,
"learning_rate": 2.674006703955817e-07,
"loss": 0.5862,
"step": 14183
},
{
"epoch": 0.9,
"grad_norm": 0.9006355404853821,
"learning_rate": 2.670697377507514e-07,
"loss": 0.5654,
"step": 14184
},
{
"epoch": 0.9,
"grad_norm": 0.8944267630577087,
"learning_rate": 2.667390043931517e-07,
"loss": 0.5717,
"step": 14185
},
{
"epoch": 0.9,
"grad_norm": 0.8411933779716492,
"learning_rate": 2.664084703367059e-07,
"loss": 0.5411,
"step": 14186
},
{
"epoch": 0.9,
"grad_norm": 0.9336392283439636,
"learning_rate": 2.6607813559533236e-07,
"loss": 0.5433,
"step": 14187
},
{
"epoch": 0.9,
"grad_norm": 0.8673104643821716,
"learning_rate": 2.6574800018294043e-07,
"loss": 0.5634,
"step": 14188
},
{
"epoch": 0.9,
"grad_norm": 0.9110936522483826,
"learning_rate": 2.654180641134313e-07,
"loss": 0.5422,
"step": 14189
},
{
"epoch": 0.9,
"grad_norm": 0.8549519777297974,
"learning_rate": 2.650883274006966e-07,
"loss": 0.5758,
"step": 14190
},
{
"epoch": 0.9,
"grad_norm": 0.9573348164558411,
"learning_rate": 2.6475879005862183e-07,
"loss": 0.6086,
"step": 14191
},
{
"epoch": 0.9,
"grad_norm": 0.8656295537948608,
"learning_rate": 2.644294521010804e-07,
"loss": 0.5346,
"step": 14192
},
{
"epoch": 0.9,
"grad_norm": 0.9295397996902466,
"learning_rate": 2.6410031354194175e-07,
"loss": 0.6098,
"step": 14193
},
{
"epoch": 0.9,
"grad_norm": 0.8475077748298645,
"learning_rate": 2.6377137439506373e-07,
"loss": 0.5531,
"step": 14194
},
{
"epoch": 0.9,
"grad_norm": 0.9059809446334839,
"learning_rate": 2.634426346742969e-07,
"loss": 0.5737,
"step": 14195
},
{
"epoch": 0.9,
"grad_norm": 0.9273040890693665,
"learning_rate": 2.6311409439348403e-07,
"loss": 0.5759,
"step": 14196
},
{
"epoch": 0.9,
"grad_norm": 0.8897231221199036,
"learning_rate": 2.6278575356645687e-07,
"loss": 0.5843,
"step": 14197
},
{
"epoch": 0.9,
"grad_norm": 0.9147869348526001,
"learning_rate": 2.624576122070427e-07,
"loss": 0.6142,
"step": 14198
},
{
"epoch": 0.9,
"grad_norm": 0.8592173457145691,
"learning_rate": 2.621296703290588e-07,
"loss": 0.5259,
"step": 14199
},
{
"epoch": 0.9,
"grad_norm": 0.8588521480560303,
"learning_rate": 2.6180192794631133e-07,
"loss": 0.5644,
"step": 14200
},
{
"epoch": 0.9,
"grad_norm": 0.913691520690918,
"learning_rate": 2.6147438507260205e-07,
"loss": 0.5955,
"step": 14201
},
{
"epoch": 0.9,
"grad_norm": 0.9359204769134521,
"learning_rate": 2.611470417217227e-07,
"loss": 0.563,
"step": 14202
},
{
"epoch": 0.9,
"grad_norm": 0.8501827120780945,
"learning_rate": 2.6081989790745554e-07,
"loss": 0.553,
"step": 14203
},
{
"epoch": 0.9,
"grad_norm": 0.8793197274208069,
"learning_rate": 2.6049295364357684e-07,
"loss": 0.6034,
"step": 14204
},
{
"epoch": 0.9,
"grad_norm": 0.8312693238258362,
"learning_rate": 2.6016620894385113e-07,
"loss": 0.5353,
"step": 14205
},
{
"epoch": 0.9,
"grad_norm": 0.8711232542991638,
"learning_rate": 2.59839663822038e-07,
"loss": 0.5567,
"step": 14206
},
{
"epoch": 0.9,
"grad_norm": 0.9392272233963013,
"learning_rate": 2.5951331829188797e-07,
"loss": 0.5764,
"step": 14207
},
{
"epoch": 0.9,
"grad_norm": 0.8943392634391785,
"learning_rate": 2.591871723671402e-07,
"loss": 0.5447,
"step": 14208
},
{
"epoch": 0.9,
"grad_norm": 0.8029001355171204,
"learning_rate": 2.5886122606152866e-07,
"loss": 0.5799,
"step": 14209
},
{
"epoch": 0.9,
"grad_norm": 0.8463373780250549,
"learning_rate": 2.585354793887779e-07,
"loss": 0.5161,
"step": 14210
},
{
"epoch": 0.9,
"grad_norm": 0.9136335849761963,
"learning_rate": 2.5820993236260305e-07,
"loss": 0.5433,
"step": 14211
},
{
"epoch": 0.9,
"grad_norm": 0.8141673803329468,
"learning_rate": 2.5788458499671376e-07,
"loss": 0.5733,
"step": 14212
},
{
"epoch": 0.9,
"grad_norm": 0.9717278480529785,
"learning_rate": 2.5755943730480735e-07,
"loss": 0.5817,
"step": 14213
},
{
"epoch": 0.9,
"grad_norm": 0.91008460521698,
"learning_rate": 2.5723448930057405e-07,
"loss": 0.6328,
"step": 14214
},
{
"epoch": 0.9,
"grad_norm": 0.9435662031173706,
"learning_rate": 2.569097409976995e-07,
"loss": 0.5827,
"step": 14215
},
{
"epoch": 0.9,
"grad_norm": 0.8856955766677856,
"learning_rate": 2.5658519240985444e-07,
"loss": 0.5993,
"step": 14216
},
{
"epoch": 0.9,
"grad_norm": 0.8379449248313904,
"learning_rate": 2.5626084355070634e-07,
"loss": 0.596,
"step": 14217
},
{
"epoch": 0.9,
"grad_norm": 0.8931264281272888,
"learning_rate": 2.5593669443391145e-07,
"loss": 0.6241,
"step": 14218
},
{
"epoch": 0.9,
"grad_norm": 1.0004993677139282,
"learning_rate": 2.556127450731194e-07,
"loss": 0.6224,
"step": 14219
},
{
"epoch": 0.9,
"grad_norm": 0.9444043040275574,
"learning_rate": 2.552889954819704e-07,
"loss": 0.56,
"step": 14220
},
{
"epoch": 0.9,
"grad_norm": 0.9467916488647461,
"learning_rate": 2.5496544567409577e-07,
"loss": 0.593,
"step": 14221
},
{
"epoch": 0.9,
"grad_norm": 0.9097285866737366,
"learning_rate": 2.5464209566311847e-07,
"loss": 0.5677,
"step": 14222
},
{
"epoch": 0.9,
"grad_norm": 1.0071593523025513,
"learning_rate": 2.5431894546265654e-07,
"loss": 0.6015,
"step": 14223
},
{
"epoch": 0.9,
"grad_norm": 0.8538757562637329,
"learning_rate": 2.5399599508631356e-07,
"loss": 0.5435,
"step": 14224
},
{
"epoch": 0.9,
"grad_norm": 0.8897154331207275,
"learning_rate": 2.5367324454768916e-07,
"loss": 0.6174,
"step": 14225
},
{
"epoch": 0.9,
"grad_norm": 0.8830700516700745,
"learning_rate": 2.5335069386037414e-07,
"loss": 0.604,
"step": 14226
},
{
"epoch": 0.9,
"grad_norm": 0.8626001477241516,
"learning_rate": 2.530283430379471e-07,
"loss": 0.5986,
"step": 14227
},
{
"epoch": 0.9,
"grad_norm": 0.8587076663970947,
"learning_rate": 2.5270619209398497e-07,
"loss": 0.5063,
"step": 14228
},
{
"epoch": 0.9,
"grad_norm": 0.9192159175872803,
"learning_rate": 2.523842410420496e-07,
"loss": 0.6178,
"step": 14229
},
{
"epoch": 0.9,
"grad_norm": 0.9127901196479797,
"learning_rate": 2.5206248989569803e-07,
"loss": 0.5617,
"step": 14230
},
{
"epoch": 0.9,
"grad_norm": 0.9163671135902405,
"learning_rate": 2.5174093866847826e-07,
"loss": 0.5319,
"step": 14231
},
{
"epoch": 0.9,
"grad_norm": 0.8983326554298401,
"learning_rate": 2.5141958737392947e-07,
"loss": 0.5889,
"step": 14232
},
{
"epoch": 0.9,
"grad_norm": 0.9140615463256836,
"learning_rate": 2.5109843602558247e-07,
"loss": 0.5825,
"step": 14233
},
{
"epoch": 0.9,
"grad_norm": 0.837908148765564,
"learning_rate": 2.507774846369615e-07,
"loss": 0.5451,
"step": 14234
},
{
"epoch": 0.9,
"grad_norm": 0.9031140804290771,
"learning_rate": 2.5045673322157735e-07,
"loss": 0.5876,
"step": 14235
},
{
"epoch": 0.9,
"grad_norm": 0.8454420566558838,
"learning_rate": 2.501361817929393e-07,
"loss": 0.5165,
"step": 14236
},
{
"epoch": 0.9,
"grad_norm": 0.8986586332321167,
"learning_rate": 2.4981583036454203e-07,
"loss": 0.554,
"step": 14237
},
{
"epoch": 0.9,
"grad_norm": 0.8993757367134094,
"learning_rate": 2.494956789498759e-07,
"loss": 0.5715,
"step": 14238
},
{
"epoch": 0.9,
"grad_norm": 0.9246693253517151,
"learning_rate": 2.491757275624207e-07,
"loss": 0.5542,
"step": 14239
},
{
"epoch": 0.9,
"grad_norm": 0.961254894733429,
"learning_rate": 2.4885597621564896e-07,
"loss": 0.6091,
"step": 14240
},
{
"epoch": 0.9,
"grad_norm": 0.9174337387084961,
"learning_rate": 2.485364249230238e-07,
"loss": 0.5664,
"step": 14241
},
{
"epoch": 0.9,
"grad_norm": 0.824385941028595,
"learning_rate": 2.4821707369800163e-07,
"loss": 0.5478,
"step": 14242
},
{
"epoch": 0.9,
"grad_norm": 0.868877649307251,
"learning_rate": 2.478979225540268e-07,
"loss": 0.5719,
"step": 14243
},
{
"epoch": 0.9,
"grad_norm": 0.9093589782714844,
"learning_rate": 2.475789715045401e-07,
"loss": 0.5828,
"step": 14244
},
{
"epoch": 0.9,
"grad_norm": 0.881280243396759,
"learning_rate": 2.472602205629698e-07,
"loss": 0.5403,
"step": 14245
},
{
"epoch": 0.9,
"grad_norm": 0.9545583128929138,
"learning_rate": 2.469416697427379e-07,
"loss": 0.5959,
"step": 14246
},
{
"epoch": 0.9,
"grad_norm": 0.9496628642082214,
"learning_rate": 2.466233190572581e-07,
"loss": 0.5762,
"step": 14247
},
{
"epoch": 0.9,
"grad_norm": 0.8831350803375244,
"learning_rate": 2.463051685199341e-07,
"loss": 0.6048,
"step": 14248
},
{
"epoch": 0.9,
"grad_norm": 0.9093460440635681,
"learning_rate": 2.4598721814416306e-07,
"loss": 0.5595,
"step": 14249
},
{
"epoch": 0.9,
"grad_norm": 0.8506335616111755,
"learning_rate": 2.4566946794333247e-07,
"loss": 0.579,
"step": 14250
},
{
"epoch": 0.9,
"grad_norm": 0.9103783369064331,
"learning_rate": 2.4535191793082116e-07,
"loss": 0.5803,
"step": 14251
},
{
"epoch": 0.9,
"grad_norm": 0.8295513391494751,
"learning_rate": 2.4503456812e-07,
"loss": 0.5156,
"step": 14252
},
{
"epoch": 0.9,
"grad_norm": 0.956263542175293,
"learning_rate": 2.447174185242324e-07,
"loss": 0.5824,
"step": 14253
},
{
"epoch": 0.9,
"grad_norm": 0.8482615947723389,
"learning_rate": 2.4440046915687135e-07,
"loss": 0.531,
"step": 14254
},
{
"epoch": 0.9,
"grad_norm": 0.8663813471794128,
"learning_rate": 2.4408372003126345e-07,
"loss": 0.6005,
"step": 14255
},
{
"epoch": 0.9,
"grad_norm": 0.9090369939804077,
"learning_rate": 2.4376717116074533e-07,
"loss": 0.5589,
"step": 14256
},
{
"epoch": 0.9,
"grad_norm": 0.803523600101471,
"learning_rate": 2.434508225586457e-07,
"loss": 0.5677,
"step": 14257
},
{
"epoch": 0.9,
"grad_norm": 0.8888107538223267,
"learning_rate": 2.431346742382856e-07,
"loss": 0.5486,
"step": 14258
},
{
"epoch": 0.9,
"grad_norm": 0.874443769454956,
"learning_rate": 2.428187262129761e-07,
"loss": 0.5408,
"step": 14259
},
{
"epoch": 0.9,
"grad_norm": 0.8557073473930359,
"learning_rate": 2.4250297849602145e-07,
"loss": 0.6157,
"step": 14260
},
{
"epoch": 0.9,
"grad_norm": 0.890663743019104,
"learning_rate": 2.421874311007155e-07,
"loss": 0.5452,
"step": 14261
},
{
"epoch": 0.9,
"grad_norm": 0.9649395942687988,
"learning_rate": 2.41872084040346e-07,
"loss": 0.5703,
"step": 14262
},
{
"epoch": 0.9,
"grad_norm": 0.8820181488990784,
"learning_rate": 2.4155693732819065e-07,
"loss": 0.5555,
"step": 14263
},
{
"epoch": 0.9,
"grad_norm": 0.9557092189788818,
"learning_rate": 2.412419909775199e-07,
"loss": 0.5643,
"step": 14264
},
{
"epoch": 0.9,
"grad_norm": 0.8582893013954163,
"learning_rate": 2.4092724500159315e-07,
"loss": 0.5908,
"step": 14265
},
{
"epoch": 0.9,
"grad_norm": 0.8829105496406555,
"learning_rate": 2.406126994136654e-07,
"loss": 0.5481,
"step": 14266
},
{
"epoch": 0.9,
"grad_norm": 0.8586880564689636,
"learning_rate": 2.402983542269799e-07,
"loss": 0.5469,
"step": 14267
},
{
"epoch": 0.9,
"grad_norm": 0.9591865539550781,
"learning_rate": 2.3998420945477276e-07,
"loss": 0.5816,
"step": 14268
},
{
"epoch": 0.9,
"grad_norm": 0.8931147456169128,
"learning_rate": 2.3967026511027224e-07,
"loss": 0.6198,
"step": 14269
},
{
"epoch": 0.9,
"grad_norm": 0.8495928645133972,
"learning_rate": 2.393565212066962e-07,
"loss": 0.5475,
"step": 14270
},
{
"epoch": 0.9,
"grad_norm": 0.8455556035041809,
"learning_rate": 2.3904297775725614e-07,
"loss": 0.5551,
"step": 14271
},
{
"epoch": 0.9,
"grad_norm": 0.8738865256309509,
"learning_rate": 2.3872963477515497e-07,
"loss": 0.5653,
"step": 14272
},
{
"epoch": 0.9,
"grad_norm": 0.89951092004776,
"learning_rate": 2.3841649227358489e-07,
"loss": 0.556,
"step": 14273
},
{
"epoch": 0.9,
"grad_norm": 0.8624039888381958,
"learning_rate": 2.3810355026573195e-07,
"loss": 0.5726,
"step": 14274
},
{
"epoch": 0.9,
"grad_norm": 0.9102184176445007,
"learning_rate": 2.377908087647729e-07,
"loss": 0.5875,
"step": 14275
},
{
"epoch": 0.9,
"grad_norm": 0.8978198766708374,
"learning_rate": 2.374782677838766e-07,
"loss": 0.5379,
"step": 14276
},
{
"epoch": 0.9,
"grad_norm": 0.8829779028892517,
"learning_rate": 2.3716592733620315e-07,
"loss": 0.6237,
"step": 14277
},
{
"epoch": 0.9,
"grad_norm": 0.9458640217781067,
"learning_rate": 2.3685378743490306e-07,
"loss": 0.5471,
"step": 14278
},
{
"epoch": 0.9,
"grad_norm": 0.9152674078941345,
"learning_rate": 2.3654184809312032e-07,
"loss": 0.563,
"step": 14279
},
{
"epoch": 0.9,
"grad_norm": 0.7956060171127319,
"learning_rate": 2.362301093239905e-07,
"loss": 0.514,
"step": 14280
},
{
"epoch": 0.9,
"grad_norm": 0.8902352452278137,
"learning_rate": 2.359185711406381e-07,
"loss": 0.5497,
"step": 14281
},
{
"epoch": 0.9,
"grad_norm": 0.8263982534408569,
"learning_rate": 2.3560723355618152e-07,
"loss": 0.5835,
"step": 14282
},
{
"epoch": 0.9,
"grad_norm": 0.8512941002845764,
"learning_rate": 2.3529609658373032e-07,
"loss": 0.5797,
"step": 14283
},
{
"epoch": 0.9,
"grad_norm": 0.9477369785308838,
"learning_rate": 2.3498516023638562e-07,
"loss": 0.5849,
"step": 14284
},
{
"epoch": 0.91,
"grad_norm": 0.9041728377342224,
"learning_rate": 2.3467442452723976e-07,
"loss": 0.5284,
"step": 14285
},
{
"epoch": 0.91,
"grad_norm": 0.9231773614883423,
"learning_rate": 2.3436388946937504e-07,
"loss": 0.533,
"step": 14286
},
{
"epoch": 0.91,
"grad_norm": 0.8928778171539307,
"learning_rate": 2.3405355507586992e-07,
"loss": 0.5975,
"step": 14287
},
{
"epoch": 0.91,
"grad_norm": 0.8574181795120239,
"learning_rate": 2.3374342135979e-07,
"loss": 0.5832,
"step": 14288
},
{
"epoch": 0.91,
"grad_norm": 0.900062620639801,
"learning_rate": 2.3343348833419377e-07,
"loss": 0.6058,
"step": 14289
},
{
"epoch": 0.91,
"grad_norm": 0.9051018953323364,
"learning_rate": 2.3312375601213134e-07,
"loss": 0.5408,
"step": 14290
},
{
"epoch": 0.91,
"grad_norm": 0.9527705311775208,
"learning_rate": 2.3281422440664503e-07,
"loss": 0.5875,
"step": 14291
},
{
"epoch": 0.91,
"grad_norm": 0.9164147973060608,
"learning_rate": 2.3250489353076777e-07,
"loss": 0.5846,
"step": 14292
},
{
"epoch": 0.91,
"grad_norm": 0.9179802536964417,
"learning_rate": 2.3219576339752525e-07,
"loss": 0.58,
"step": 14293
},
{
"epoch": 0.91,
"grad_norm": 0.9129411578178406,
"learning_rate": 2.3188683401993261e-07,
"loss": 0.5996,
"step": 14294
},
{
"epoch": 0.91,
"grad_norm": 0.8692997097969055,
"learning_rate": 2.3157810541099724e-07,
"loss": 0.5227,
"step": 14295
},
{
"epoch": 0.91,
"grad_norm": 0.8740088939666748,
"learning_rate": 2.3126957758372149e-07,
"loss": 0.5473,
"step": 14296
},
{
"epoch": 0.91,
"grad_norm": 0.908848762512207,
"learning_rate": 2.3096125055109386e-07,
"loss": 0.543,
"step": 14297
},
{
"epoch": 0.91,
"grad_norm": 0.8550407290458679,
"learning_rate": 2.3065312432609788e-07,
"loss": 0.557,
"step": 14298
},
{
"epoch": 0.91,
"grad_norm": 0.852614164352417,
"learning_rate": 2.3034519892170705e-07,
"loss": 0.5691,
"step": 14299
},
{
"epoch": 0.91,
"grad_norm": 0.8640037775039673,
"learning_rate": 2.3003747435088764e-07,
"loss": 0.5388,
"step": 14300
},
{
"epoch": 0.91,
"grad_norm": 0.8585585355758667,
"learning_rate": 2.2972995062659764e-07,
"loss": 0.5929,
"step": 14301
},
{
"epoch": 0.91,
"grad_norm": 0.9112229943275452,
"learning_rate": 2.2942262776178392e-07,
"loss": 0.5954,
"step": 14302
},
{
"epoch": 0.91,
"grad_norm": 0.8441083431243896,
"learning_rate": 2.291155057693878e-07,
"loss": 0.571,
"step": 14303
},
{
"epoch": 0.91,
"grad_norm": 0.922584593296051,
"learning_rate": 2.2880858466234114e-07,
"loss": 0.5756,
"step": 14304
},
{
"epoch": 0.91,
"grad_norm": 0.860567569732666,
"learning_rate": 2.2850186445356693e-07,
"loss": 0.5948,
"step": 14305
},
{
"epoch": 0.91,
"grad_norm": 0.8519131541252136,
"learning_rate": 2.281953451559804e-07,
"loss": 0.5374,
"step": 14306
},
{
"epoch": 0.91,
"grad_norm": 0.9286757111549377,
"learning_rate": 2.2788902678248904e-07,
"loss": 0.6002,
"step": 14307
},
{
"epoch": 0.91,
"grad_norm": 0.8797339797019958,
"learning_rate": 2.2758290934598805e-07,
"loss": 0.5811,
"step": 14308
},
{
"epoch": 0.91,
"grad_norm": 0.871191680431366,
"learning_rate": 2.2727699285937043e-07,
"loss": 0.5332,
"step": 14309
},
{
"epoch": 0.91,
"grad_norm": 1.0105799436569214,
"learning_rate": 2.2697127733551483e-07,
"loss": 0.6011,
"step": 14310
},
{
"epoch": 0.91,
"grad_norm": 0.8978657722473145,
"learning_rate": 2.2666576278729424e-07,
"loss": 0.6085,
"step": 14311
},
{
"epoch": 0.91,
"grad_norm": 0.9255068302154541,
"learning_rate": 2.2636044922757339e-07,
"loss": 0.5758,
"step": 14312
},
{
"epoch": 0.91,
"grad_norm": 0.8957815170288086,
"learning_rate": 2.2605533666920753e-07,
"loss": 0.5948,
"step": 14313
},
{
"epoch": 0.91,
"grad_norm": 0.9146010279655457,
"learning_rate": 2.257504251250442e-07,
"loss": 0.5328,
"step": 14314
},
{
"epoch": 0.91,
"grad_norm": 0.8640668392181396,
"learning_rate": 2.2544571460792308e-07,
"loss": 0.5836,
"step": 14315
},
{
"epoch": 0.91,
"grad_norm": 0.8612959980964661,
"learning_rate": 2.251412051306717e-07,
"loss": 0.5623,
"step": 14316
},
{
"epoch": 0.91,
"grad_norm": 0.8819116950035095,
"learning_rate": 2.2483689670611542e-07,
"loss": 0.6256,
"step": 14317
},
{
"epoch": 0.91,
"grad_norm": 0.9524977207183838,
"learning_rate": 2.2453278934706446e-07,
"loss": 0.6195,
"step": 14318
},
{
"epoch": 0.91,
"grad_norm": 0.912262499332428,
"learning_rate": 2.2422888306632584e-07,
"loss": 0.5568,
"step": 14319
},
{
"epoch": 0.91,
"grad_norm": 0.8547895550727844,
"learning_rate": 2.2392517787669487e-07,
"loss": 0.5127,
"step": 14320
},
{
"epoch": 0.91,
"grad_norm": 0.9063989520072937,
"learning_rate": 2.2362167379096023e-07,
"loss": 0.5975,
"step": 14321
},
{
"epoch": 0.91,
"grad_norm": 0.8552689552307129,
"learning_rate": 2.2331837082190056e-07,
"loss": 0.5232,
"step": 14322
},
{
"epoch": 0.91,
"grad_norm": 0.8747928142547607,
"learning_rate": 2.2301526898228842e-07,
"loss": 0.5471,
"step": 14323
},
{
"epoch": 0.91,
"grad_norm": 0.879938006401062,
"learning_rate": 2.2271236828488474e-07,
"loss": 0.5942,
"step": 14324
},
{
"epoch": 0.91,
"grad_norm": 0.9146292209625244,
"learning_rate": 2.224096687424443e-07,
"loss": 0.57,
"step": 14325
},
{
"epoch": 0.91,
"grad_norm": 0.8963407874107361,
"learning_rate": 2.2210717036771246e-07,
"loss": 0.549,
"step": 14326
},
{
"epoch": 0.91,
"grad_norm": 0.9316021800041199,
"learning_rate": 2.218048731734268e-07,
"loss": 0.6144,
"step": 14327
},
{
"epoch": 0.91,
"grad_norm": 0.8399151563644409,
"learning_rate": 2.215027771723155e-07,
"loss": 0.5571,
"step": 14328
},
{
"epoch": 0.91,
"grad_norm": 0.8416658043861389,
"learning_rate": 2.2120088237709946e-07,
"loss": 0.5699,
"step": 14329
},
{
"epoch": 0.91,
"grad_norm": 0.9296332597732544,
"learning_rate": 2.2089918880049023e-07,
"loss": 0.5445,
"step": 14330
},
{
"epoch": 0.91,
"grad_norm": 0.8599060773849487,
"learning_rate": 2.205976964551909e-07,
"loss": 0.5574,
"step": 14331
},
{
"epoch": 0.91,
"grad_norm": 0.9314765334129333,
"learning_rate": 2.2029640535389586e-07,
"loss": 0.5455,
"step": 14332
},
{
"epoch": 0.91,
"grad_norm": 0.8735009431838989,
"learning_rate": 2.1999531550929098e-07,
"loss": 0.5763,
"step": 14333
},
{
"epoch": 0.91,
"grad_norm": 0.8349171280860901,
"learning_rate": 2.1969442693405673e-07,
"loss": 0.5455,
"step": 14334
},
{
"epoch": 0.91,
"grad_norm": 0.8516371846199036,
"learning_rate": 2.1939373964085964e-07,
"loss": 0.6168,
"step": 14335
},
{
"epoch": 0.91,
"grad_norm": 0.8744125366210938,
"learning_rate": 2.190932536423618e-07,
"loss": 0.5543,
"step": 14336
},
{
"epoch": 0.91,
"grad_norm": 0.9350723028182983,
"learning_rate": 2.1879296895121637e-07,
"loss": 0.6037,
"step": 14337
},
{
"epoch": 0.91,
"grad_norm": 0.8729871511459351,
"learning_rate": 2.1849288558006442e-07,
"loss": 0.5759,
"step": 14338
},
{
"epoch": 0.91,
"grad_norm": 0.9231459498405457,
"learning_rate": 2.1819300354154526e-07,
"loss": 0.6121,
"step": 14339
},
{
"epoch": 0.91,
"grad_norm": 0.941109299659729,
"learning_rate": 2.1789332284828323e-07,
"loss": 0.5852,
"step": 14340
},
{
"epoch": 0.91,
"grad_norm": 0.7934569716453552,
"learning_rate": 2.175938435128977e-07,
"loss": 0.501,
"step": 14341
},
{
"epoch": 0.91,
"grad_norm": 0.8977835774421692,
"learning_rate": 2.1729456554799855e-07,
"loss": 0.5864,
"step": 14342
},
{
"epoch": 0.91,
"grad_norm": 0.9556468725204468,
"learning_rate": 2.1699548896618795e-07,
"loss": 0.6184,
"step": 14343
},
{
"epoch": 0.91,
"grad_norm": 0.8871400952339172,
"learning_rate": 2.1669661378005802e-07,
"loss": 0.55,
"step": 14344
},
{
"epoch": 0.91,
"grad_norm": 0.8647100329399109,
"learning_rate": 2.1639794000219426e-07,
"loss": 0.5605,
"step": 14345
},
{
"epoch": 0.91,
"grad_norm": 0.9198769330978394,
"learning_rate": 2.1609946764517108e-07,
"loss": 0.5757,
"step": 14346
},
{
"epoch": 0.91,
"grad_norm": 0.9258266687393188,
"learning_rate": 2.1580119672155898e-07,
"loss": 0.6241,
"step": 14347
},
{
"epoch": 0.91,
"grad_norm": 0.8942254781723022,
"learning_rate": 2.1550312724391452e-07,
"loss": 0.5967,
"step": 14348
},
{
"epoch": 0.91,
"grad_norm": 0.8659271597862244,
"learning_rate": 2.152052592247894e-07,
"loss": 0.5591,
"step": 14349
},
{
"epoch": 0.91,
"grad_norm": 0.86356520652771,
"learning_rate": 2.1490759267672634e-07,
"loss": 0.5706,
"step": 14350
},
{
"epoch": 0.91,
"grad_norm": 0.9083942770957947,
"learning_rate": 2.1461012761225696e-07,
"loss": 0.5906,
"step": 14351
},
{
"epoch": 0.91,
"grad_norm": 0.8626084327697754,
"learning_rate": 2.143128640439085e-07,
"loss": 0.5377,
"step": 14352
},
{
"epoch": 0.91,
"grad_norm": 0.8646213412284851,
"learning_rate": 2.1401580198419812e-07,
"loss": 0.5453,
"step": 14353
},
{
"epoch": 0.91,
"grad_norm": 0.8685572147369385,
"learning_rate": 2.1371894144563254e-07,
"loss": 0.5698,
"step": 14354
},
{
"epoch": 0.91,
"grad_norm": 0.8844971060752869,
"learning_rate": 2.1342228244071173e-07,
"loss": 0.5964,
"step": 14355
},
{
"epoch": 0.91,
"grad_norm": 0.8692290186882019,
"learning_rate": 2.1312582498192792e-07,
"loss": 0.5788,
"step": 14356
},
{
"epoch": 0.91,
"grad_norm": 0.9128808975219727,
"learning_rate": 2.1282956908176277e-07,
"loss": 0.5831,
"step": 14357
},
{
"epoch": 0.91,
"grad_norm": 0.8476049304008484,
"learning_rate": 2.125335147526919e-07,
"loss": 0.5757,
"step": 14358
},
{
"epoch": 0.91,
"grad_norm": 0.9030495882034302,
"learning_rate": 2.122376620071792e-07,
"loss": 0.6114,
"step": 14359
},
{
"epoch": 0.91,
"grad_norm": 0.9279770851135254,
"learning_rate": 2.1194201085768363e-07,
"loss": 0.5573,
"step": 14360
},
{
"epoch": 0.91,
"grad_norm": 0.915275514125824,
"learning_rate": 2.1164656131665407e-07,
"loss": 0.5517,
"step": 14361
},
{
"epoch": 0.91,
"grad_norm": 0.8605398535728455,
"learning_rate": 2.1135131339652947e-07,
"loss": 0.5765,
"step": 14362
},
{
"epoch": 0.91,
"grad_norm": 0.8572301864624023,
"learning_rate": 2.1105626710974325e-07,
"loss": 0.5418,
"step": 14363
},
{
"epoch": 0.91,
"grad_norm": 1.0193010568618774,
"learning_rate": 2.1076142246871766e-07,
"loss": 0.523,
"step": 14364
},
{
"epoch": 0.91,
"grad_norm": 0.8625369668006897,
"learning_rate": 2.1046677948586836e-07,
"loss": 0.5679,
"step": 14365
},
{
"epoch": 0.91,
"grad_norm": 0.878371000289917,
"learning_rate": 2.1017233817360149e-07,
"loss": 0.5779,
"step": 14366
},
{
"epoch": 0.91,
"grad_norm": 0.9470487833023071,
"learning_rate": 2.098780985443144e-07,
"loss": 0.6326,
"step": 14367
},
{
"epoch": 0.91,
"grad_norm": 0.8446786999702454,
"learning_rate": 2.095840606103966e-07,
"loss": 0.5459,
"step": 14368
},
{
"epoch": 0.91,
"grad_norm": 0.862938642501831,
"learning_rate": 2.092902243842304e-07,
"loss": 0.5463,
"step": 14369
},
{
"epoch": 0.91,
"grad_norm": 0.9745242595672607,
"learning_rate": 2.0899658987818705e-07,
"loss": 0.5714,
"step": 14370
},
{
"epoch": 0.91,
"grad_norm": 0.9046638607978821,
"learning_rate": 2.0870315710462996e-07,
"loss": 0.5915,
"step": 14371
},
{
"epoch": 0.91,
"grad_norm": 0.9039925932884216,
"learning_rate": 2.0840992607591593e-07,
"loss": 0.5574,
"step": 14372
},
{
"epoch": 0.91,
"grad_norm": 0.9303500056266785,
"learning_rate": 2.081168968043906e-07,
"loss": 0.5953,
"step": 14373
},
{
"epoch": 0.91,
"grad_norm": 0.9302768707275391,
"learning_rate": 2.0782406930239363e-07,
"loss": 0.6172,
"step": 14374
},
{
"epoch": 0.91,
"grad_norm": 0.8675611019134521,
"learning_rate": 2.0753144358225397e-07,
"loss": 0.5759,
"step": 14375
},
{
"epoch": 0.91,
"grad_norm": 0.9458869695663452,
"learning_rate": 2.072390196562929e-07,
"loss": 0.5794,
"step": 14376
},
{
"epoch": 0.91,
"grad_norm": 0.934730052947998,
"learning_rate": 2.0694679753682445e-07,
"loss": 0.5584,
"step": 14377
},
{
"epoch": 0.91,
"grad_norm": 0.8245408535003662,
"learning_rate": 2.0665477723615268e-07,
"loss": 0.5508,
"step": 14378
},
{
"epoch": 0.91,
"grad_norm": 0.8302925229072571,
"learning_rate": 2.063629587665733e-07,
"loss": 0.5543,
"step": 14379
},
{
"epoch": 0.91,
"grad_norm": 0.8917962908744812,
"learning_rate": 2.0607134214037373e-07,
"loss": 0.5144,
"step": 14380
},
{
"epoch": 0.91,
"grad_norm": 0.830355703830719,
"learning_rate": 2.05779927369833e-07,
"loss": 0.5366,
"step": 14381
},
{
"epoch": 0.91,
"grad_norm": 0.8706912994384766,
"learning_rate": 2.054887144672224e-07,
"loss": 0.5151,
"step": 14382
},
{
"epoch": 0.91,
"grad_norm": 0.8185093402862549,
"learning_rate": 2.0519770344480272e-07,
"loss": 0.5727,
"step": 14383
},
{
"epoch": 0.91,
"grad_norm": 0.876105785369873,
"learning_rate": 2.0490689431482746e-07,
"loss": 0.541,
"step": 14384
},
{
"epoch": 0.91,
"grad_norm": 0.899113118648529,
"learning_rate": 2.0461628708954183e-07,
"loss": 0.6337,
"step": 14385
},
{
"epoch": 0.91,
"grad_norm": 0.8771045207977295,
"learning_rate": 2.0432588178118274e-07,
"loss": 0.5753,
"step": 14386
},
{
"epoch": 0.91,
"grad_norm": 0.8254141211509705,
"learning_rate": 2.0403567840197813e-07,
"loss": 0.5234,
"step": 14387
},
{
"epoch": 0.91,
"grad_norm": 0.9506052732467651,
"learning_rate": 2.0374567696414716e-07,
"loss": 0.5637,
"step": 14388
},
{
"epoch": 0.91,
"grad_norm": 0.9225839972496033,
"learning_rate": 2.0345587747990004e-07,
"loss": 0.5918,
"step": 14389
},
{
"epoch": 0.91,
"grad_norm": 0.9083296656608582,
"learning_rate": 2.0316627996144035e-07,
"loss": 0.5388,
"step": 14390
},
{
"epoch": 0.91,
"grad_norm": 0.9312944412231445,
"learning_rate": 2.028768844209622e-07,
"loss": 0.5816,
"step": 14391
},
{
"epoch": 0.91,
"grad_norm": 0.9078527092933655,
"learning_rate": 2.0258769087065034e-07,
"loss": 0.62,
"step": 14392
},
{
"epoch": 0.91,
"grad_norm": 0.8647677898406982,
"learning_rate": 2.022986993226811e-07,
"loss": 0.5776,
"step": 14393
},
{
"epoch": 0.91,
"grad_norm": 0.8233307600021362,
"learning_rate": 2.020099097892242e-07,
"loss": 0.5451,
"step": 14394
},
{
"epoch": 0.91,
"grad_norm": 0.8663762211799622,
"learning_rate": 2.0172132228243878e-07,
"loss": 0.6064,
"step": 14395
},
{
"epoch": 0.91,
"grad_norm": 0.8814685940742493,
"learning_rate": 2.014329368144774e-07,
"loss": 0.5361,
"step": 14396
},
{
"epoch": 0.91,
"grad_norm": 0.9814503788948059,
"learning_rate": 2.0114475339748085e-07,
"loss": 0.6066,
"step": 14397
},
{
"epoch": 0.91,
"grad_norm": 0.8616315126419067,
"learning_rate": 2.0085677204358445e-07,
"loss": 0.5469,
"step": 14398
},
{
"epoch": 0.91,
"grad_norm": 0.8662075400352478,
"learning_rate": 2.005689927649157e-07,
"loss": 0.601,
"step": 14399
},
{
"epoch": 0.91,
"grad_norm": 0.8765701055526733,
"learning_rate": 2.0028141557358992e-07,
"loss": 0.5845,
"step": 14400
},
{
"epoch": 0.91,
"grad_norm": 0.9240060448646545,
"learning_rate": 1.999940404817169e-07,
"loss": 0.5803,
"step": 14401
},
{
"epoch": 0.91,
"grad_norm": 0.8918695449829102,
"learning_rate": 1.9970686750139633e-07,
"loss": 0.5592,
"step": 14402
},
{
"epoch": 0.91,
"grad_norm": 0.8836629986763,
"learning_rate": 1.994198966447214e-07,
"loss": 0.6042,
"step": 14403
},
{
"epoch": 0.91,
"grad_norm": 0.8865856528282166,
"learning_rate": 1.991331279237746e-07,
"loss": 0.6085,
"step": 14404
},
{
"epoch": 0.91,
"grad_norm": 0.8742964863777161,
"learning_rate": 1.988465613506302e-07,
"loss": 0.5309,
"step": 14405
},
{
"epoch": 0.91,
"grad_norm": 0.9155290126800537,
"learning_rate": 1.9856019693735463e-07,
"loss": 0.6035,
"step": 14406
},
{
"epoch": 0.91,
"grad_norm": 0.8845750093460083,
"learning_rate": 1.982740346960077e-07,
"loss": 0.5975,
"step": 14407
},
{
"epoch": 0.91,
"grad_norm": 1.014074683189392,
"learning_rate": 1.9798807463863589e-07,
"loss": 0.615,
"step": 14408
},
{
"epoch": 0.91,
"grad_norm": 0.8332728147506714,
"learning_rate": 1.977023167772818e-07,
"loss": 0.551,
"step": 14409
},
{
"epoch": 0.91,
"grad_norm": 0.8898774981498718,
"learning_rate": 1.9741676112397688e-07,
"loss": 0.5352,
"step": 14410
},
{
"epoch": 0.91,
"grad_norm": 0.8795101046562195,
"learning_rate": 1.9713140769074546e-07,
"loss": 0.5739,
"step": 14411
},
{
"epoch": 0.91,
"grad_norm": 0.9184449315071106,
"learning_rate": 1.9684625648960287e-07,
"loss": 0.5652,
"step": 14412
},
{
"epoch": 0.91,
"grad_norm": 0.837814450263977,
"learning_rate": 1.965613075325551e-07,
"loss": 0.5506,
"step": 14413
},
{
"epoch": 0.91,
"grad_norm": 0.9577629566192627,
"learning_rate": 1.9627656083160085e-07,
"loss": 0.6132,
"step": 14414
},
{
"epoch": 0.91,
"grad_norm": 0.8790633678436279,
"learning_rate": 1.9599201639872943e-07,
"loss": 0.5509,
"step": 14415
},
{
"epoch": 0.91,
"grad_norm": 0.8492127060890198,
"learning_rate": 1.9570767424592186e-07,
"loss": 0.5252,
"step": 14416
},
{
"epoch": 0.91,
"grad_norm": 0.9555572271347046,
"learning_rate": 1.9542353438515183e-07,
"loss": 0.5623,
"step": 14417
},
{
"epoch": 0.91,
"grad_norm": 0.8835919499397278,
"learning_rate": 1.9513959682838314e-07,
"loss": 0.5849,
"step": 14418
},
{
"epoch": 0.91,
"grad_norm": 0.882376492023468,
"learning_rate": 1.9485586158757009e-07,
"loss": 0.5733,
"step": 14419
},
{
"epoch": 0.91,
"grad_norm": 0.944187343120575,
"learning_rate": 1.9457232867466204e-07,
"loss": 0.6102,
"step": 14420
},
{
"epoch": 0.91,
"grad_norm": 0.9396758079528809,
"learning_rate": 1.9428899810159606e-07,
"loss": 0.5872,
"step": 14421
},
{
"epoch": 0.91,
"grad_norm": 0.98106449842453,
"learning_rate": 1.9400586988030212e-07,
"loss": 0.5745,
"step": 14422
},
{
"epoch": 0.91,
"grad_norm": 0.8788214921951294,
"learning_rate": 1.937229440227023e-07,
"loss": 0.5746,
"step": 14423
},
{
"epoch": 0.91,
"grad_norm": 0.8514944911003113,
"learning_rate": 1.9344022054070933e-07,
"loss": 0.5369,
"step": 14424
},
{
"epoch": 0.91,
"grad_norm": 0.8359341025352478,
"learning_rate": 1.9315769944622808e-07,
"loss": 0.5719,
"step": 14425
},
{
"epoch": 0.91,
"grad_norm": 0.8570429682731628,
"learning_rate": 1.9287538075115463e-07,
"loss": 0.6033,
"step": 14426
},
{
"epoch": 0.91,
"grad_norm": 0.8559786081314087,
"learning_rate": 1.9259326446737503e-07,
"loss": 0.5631,
"step": 14427
},
{
"epoch": 0.91,
"grad_norm": 0.8701675534248352,
"learning_rate": 1.9231135060677087e-07,
"loss": 0.581,
"step": 14428
},
{
"epoch": 0.91,
"grad_norm": 0.9724037051200867,
"learning_rate": 1.9202963918120988e-07,
"loss": 0.6107,
"step": 14429
},
{
"epoch": 0.91,
"grad_norm": 0.8299076557159424,
"learning_rate": 1.9174813020255533e-07,
"loss": 0.5938,
"step": 14430
},
{
"epoch": 0.91,
"grad_norm": 0.84014493227005,
"learning_rate": 1.9146682368266112e-07,
"loss": 0.5601,
"step": 14431
},
{
"epoch": 0.91,
"grad_norm": 0.9042718410491943,
"learning_rate": 1.9118571963336996e-07,
"loss": 0.5843,
"step": 14432
},
{
"epoch": 0.91,
"grad_norm": 0.8914376497268677,
"learning_rate": 1.9090481806652017e-07,
"loss": 0.5966,
"step": 14433
},
{
"epoch": 0.91,
"grad_norm": 0.832227885723114,
"learning_rate": 1.9062411899393896e-07,
"loss": 0.5538,
"step": 14434
},
{
"epoch": 0.91,
"grad_norm": 0.8363732695579529,
"learning_rate": 1.9034362242744576e-07,
"loss": 0.5605,
"step": 14435
},
{
"epoch": 0.91,
"grad_norm": 0.9074519872665405,
"learning_rate": 1.9006332837885054e-07,
"loss": 0.5895,
"step": 14436
},
{
"epoch": 0.91,
"grad_norm": 0.9413220882415771,
"learning_rate": 1.8978323685995558e-07,
"loss": 0.6647,
"step": 14437
},
{
"epoch": 0.91,
"grad_norm": 0.9103296995162964,
"learning_rate": 1.8950334788255586e-07,
"loss": 0.5697,
"step": 14438
},
{
"epoch": 0.91,
"grad_norm": 0.8581710457801819,
"learning_rate": 1.8922366145843585e-07,
"loss": 0.5329,
"step": 14439
},
{
"epoch": 0.91,
"grad_norm": 0.8271042704582214,
"learning_rate": 1.8894417759937055e-07,
"loss": 0.5779,
"step": 14440
},
{
"epoch": 0.91,
"grad_norm": 0.8599358797073364,
"learning_rate": 1.886648963171306e-07,
"loss": 0.5483,
"step": 14441
},
{
"epoch": 0.91,
"grad_norm": 0.8909770250320435,
"learning_rate": 1.8838581762347485e-07,
"loss": 0.5755,
"step": 14442
},
{
"epoch": 0.92,
"grad_norm": 0.9004727005958557,
"learning_rate": 1.881069415301534e-07,
"loss": 0.5401,
"step": 14443
},
{
"epoch": 0.92,
"grad_norm": 0.926632821559906,
"learning_rate": 1.8782826804890908e-07,
"loss": 0.6182,
"step": 14444
},
{
"epoch": 0.92,
"grad_norm": 0.9216212630271912,
"learning_rate": 1.875497971914758e-07,
"loss": 0.6631,
"step": 14445
},
{
"epoch": 0.92,
"grad_norm": 0.9242258071899414,
"learning_rate": 1.872715289695798e-07,
"loss": 0.5645,
"step": 14446
},
{
"epoch": 0.92,
"grad_norm": 0.8471218943595886,
"learning_rate": 1.8699346339493774e-07,
"loss": 0.5925,
"step": 14447
},
{
"epoch": 0.92,
"grad_norm": 0.8889009952545166,
"learning_rate": 1.867156004792575e-07,
"loss": 0.5835,
"step": 14448
},
{
"epoch": 0.92,
"grad_norm": 0.9420746564865112,
"learning_rate": 1.864379402342381e-07,
"loss": 0.5516,
"step": 14449
},
{
"epoch": 0.92,
"grad_norm": 0.8928573131561279,
"learning_rate": 1.8616048267157348e-07,
"loss": 0.5829,
"step": 14450
},
{
"epoch": 0.92,
"grad_norm": 0.9594048261642456,
"learning_rate": 1.8588322780294377e-07,
"loss": 0.5724,
"step": 14451
},
{
"epoch": 0.92,
"grad_norm": 0.8984374403953552,
"learning_rate": 1.8560617564002458e-07,
"loss": 0.5723,
"step": 14452
},
{
"epoch": 0.92,
"grad_norm": 0.9626867175102234,
"learning_rate": 1.8532932619448106e-07,
"loss": 0.5672,
"step": 14453
},
{
"epoch": 0.92,
"grad_norm": 0.8767659068107605,
"learning_rate": 1.8505267947797056e-07,
"loss": 0.5572,
"step": 14454
},
{
"epoch": 0.92,
"grad_norm": 0.8869348168373108,
"learning_rate": 1.847762355021421e-07,
"loss": 0.5568,
"step": 14455
},
{
"epoch": 0.92,
"grad_norm": 0.9030115604400635,
"learning_rate": 1.8449999427863575e-07,
"loss": 0.5726,
"step": 14456
},
{
"epoch": 0.92,
"grad_norm": 0.8998832106590271,
"learning_rate": 1.842239558190817e-07,
"loss": 0.5776,
"step": 14457
},
{
"epoch": 0.92,
"grad_norm": 0.9000546932220459,
"learning_rate": 1.839481201351051e-07,
"loss": 0.5754,
"step": 14458
},
{
"epoch": 0.92,
"grad_norm": 0.9226672053337097,
"learning_rate": 1.8367248723831889e-07,
"loss": 0.6349,
"step": 14459
},
{
"epoch": 0.92,
"grad_norm": 0.8481553792953491,
"learning_rate": 1.833970571403293e-07,
"loss": 0.5471,
"step": 14460
},
{
"epoch": 0.92,
"grad_norm": 0.8419100046157837,
"learning_rate": 1.831218298527343e-07,
"loss": 0.5638,
"step": 14461
},
{
"epoch": 0.92,
"grad_norm": 0.9119687080383301,
"learning_rate": 1.828468053871213e-07,
"loss": 0.6084,
"step": 14462
},
{
"epoch": 0.92,
"grad_norm": 0.8742016553878784,
"learning_rate": 1.825719837550727e-07,
"loss": 0.5708,
"step": 14463
},
{
"epoch": 0.92,
"grad_norm": 0.8338335752487183,
"learning_rate": 1.822973649681592e-07,
"loss": 0.5689,
"step": 14464
},
{
"epoch": 0.92,
"grad_norm": 0.8597403168678284,
"learning_rate": 1.820229490379438e-07,
"loss": 0.4852,
"step": 14465
},
{
"epoch": 0.92,
"grad_norm": 0.8943460583686829,
"learning_rate": 1.8174873597598176e-07,
"loss": 0.5449,
"step": 14466
},
{
"epoch": 0.92,
"grad_norm": 0.9471714496612549,
"learning_rate": 1.814747257938182e-07,
"loss": 0.6154,
"step": 14467
},
{
"epoch": 0.92,
"grad_norm": 0.8958661556243896,
"learning_rate": 1.8120091850299225e-07,
"loss": 0.5881,
"step": 14468
},
{
"epoch": 0.92,
"grad_norm": 0.8442410826683044,
"learning_rate": 1.809273141150325e-07,
"loss": 0.5285,
"step": 14469
},
{
"epoch": 0.92,
"grad_norm": 0.8980113863945007,
"learning_rate": 1.8065391264145805e-07,
"loss": 0.5612,
"step": 14470
},
{
"epoch": 0.92,
"grad_norm": 0.8895835876464844,
"learning_rate": 1.8038071409378299e-07,
"loss": 0.6386,
"step": 14471
},
{
"epoch": 0.92,
"grad_norm": 0.872721791267395,
"learning_rate": 1.8010771848350983e-07,
"loss": 0.6137,
"step": 14472
},
{
"epoch": 0.92,
"grad_norm": 0.8433144688606262,
"learning_rate": 1.7983492582213324e-07,
"loss": 0.5736,
"step": 14473
},
{
"epoch": 0.92,
"grad_norm": 0.9304208159446716,
"learning_rate": 1.7956233612114017e-07,
"loss": 0.5614,
"step": 14474
},
{
"epoch": 0.92,
"grad_norm": 0.8683229088783264,
"learning_rate": 1.792899493920075e-07,
"loss": 0.5202,
"step": 14475
},
{
"epoch": 0.92,
"grad_norm": 0.9167693257331848,
"learning_rate": 1.79017765646205e-07,
"loss": 0.6225,
"step": 14476
},
{
"epoch": 0.92,
"grad_norm": 0.8281375169754028,
"learning_rate": 1.78745784895194e-07,
"loss": 0.5476,
"step": 14477
},
{
"epoch": 0.92,
"grad_norm": 0.8747629523277283,
"learning_rate": 1.7847400715042594e-07,
"loss": 0.5737,
"step": 14478
},
{
"epoch": 0.92,
"grad_norm": 0.916631817817688,
"learning_rate": 1.7820243242334334e-07,
"loss": 0.6067,
"step": 14479
},
{
"epoch": 0.92,
"grad_norm": 0.9027150869369507,
"learning_rate": 1.7793106072538423e-07,
"loss": 0.5696,
"step": 14480
},
{
"epoch": 0.92,
"grad_norm": 0.917669415473938,
"learning_rate": 1.7765989206797285e-07,
"loss": 0.6068,
"step": 14481
},
{
"epoch": 0.92,
"grad_norm": 1.0007344484329224,
"learning_rate": 1.7738892646252726e-07,
"loss": 0.5725,
"step": 14482
},
{
"epoch": 0.92,
"grad_norm": 0.9161766767501831,
"learning_rate": 1.7711816392045778e-07,
"loss": 0.5761,
"step": 14483
},
{
"epoch": 0.92,
"grad_norm": 0.892318606376648,
"learning_rate": 1.7684760445316418e-07,
"loss": 0.5412,
"step": 14484
},
{
"epoch": 0.92,
"grad_norm": 0.9114658236503601,
"learning_rate": 1.765772480720407e-07,
"loss": 0.5503,
"step": 14485
},
{
"epoch": 0.92,
"grad_norm": 0.9310790300369263,
"learning_rate": 1.763070947884693e-07,
"loss": 0.5859,
"step": 14486
},
{
"epoch": 0.92,
"grad_norm": 0.898358941078186,
"learning_rate": 1.7603714461382481e-07,
"loss": 0.6073,
"step": 14487
},
{
"epoch": 0.92,
"grad_norm": 0.846410870552063,
"learning_rate": 1.7576739755947593e-07,
"loss": 0.5229,
"step": 14488
},
{
"epoch": 0.92,
"grad_norm": 0.8562573194503784,
"learning_rate": 1.7549785363677906e-07,
"loss": 0.5742,
"step": 14489
},
{
"epoch": 0.92,
"grad_norm": 0.8590916395187378,
"learning_rate": 1.7522851285708465e-07,
"loss": 0.5965,
"step": 14490
},
{
"epoch": 0.92,
"grad_norm": 0.8809557557106018,
"learning_rate": 1.7495937523173356e-07,
"loss": 0.5514,
"step": 14491
},
{
"epoch": 0.92,
"grad_norm": 0.9356900453567505,
"learning_rate": 1.7469044077205732e-07,
"loss": 0.5705,
"step": 14492
},
{
"epoch": 0.92,
"grad_norm": 0.8484225869178772,
"learning_rate": 1.744217094893813e-07,
"loss": 0.5891,
"step": 14493
},
{
"epoch": 0.92,
"grad_norm": 0.8933218121528625,
"learning_rate": 1.7415318139502036e-07,
"loss": 0.55,
"step": 14494
},
{
"epoch": 0.92,
"grad_norm": 0.9217604398727417,
"learning_rate": 1.7388485650028043e-07,
"loss": 0.5794,
"step": 14495
},
{
"epoch": 0.92,
"grad_norm": 0.9160068035125732,
"learning_rate": 1.7361673481646025e-07,
"loss": 0.5694,
"step": 14496
},
{
"epoch": 0.92,
"grad_norm": 1.0053678750991821,
"learning_rate": 1.7334881635485023e-07,
"loss": 0.5796,
"step": 14497
},
{
"epoch": 0.92,
"grad_norm": 0.934262216091156,
"learning_rate": 1.7308110112673027e-07,
"loss": 0.5943,
"step": 14498
},
{
"epoch": 0.92,
"grad_norm": 0.8574293255805969,
"learning_rate": 1.7281358914337408e-07,
"loss": 0.5807,
"step": 14499
},
{
"epoch": 0.92,
"grad_norm": 0.9166094064712524,
"learning_rate": 1.7254628041604437e-07,
"loss": 0.5807,
"step": 14500
},
{
"epoch": 0.92,
"grad_norm": 0.8386301398277283,
"learning_rate": 1.7227917495599823e-07,
"loss": 0.5858,
"step": 14501
},
{
"epoch": 0.92,
"grad_norm": 1.001879096031189,
"learning_rate": 1.7201227277448108e-07,
"loss": 0.6831,
"step": 14502
},
{
"epoch": 0.92,
"grad_norm": 0.887852668762207,
"learning_rate": 1.7174557388273173e-07,
"loss": 0.5888,
"step": 14503
},
{
"epoch": 0.92,
"grad_norm": 0.9143977165222168,
"learning_rate": 1.7147907829198008e-07,
"loss": 0.5656,
"step": 14504
},
{
"epoch": 0.92,
"grad_norm": 0.9383800029754639,
"learning_rate": 1.7121278601344715e-07,
"loss": 0.6177,
"step": 14505
},
{
"epoch": 0.92,
"grad_norm": 0.918759822845459,
"learning_rate": 1.7094669705834566e-07,
"loss": 0.58,
"step": 14506
},
{
"epoch": 0.92,
"grad_norm": 0.8827510476112366,
"learning_rate": 1.706808114378805e-07,
"loss": 0.5384,
"step": 14507
},
{
"epoch": 0.92,
"grad_norm": 0.8679559230804443,
"learning_rate": 1.7041512916324554e-07,
"loss": 0.4867,
"step": 14508
},
{
"epoch": 0.92,
"grad_norm": 0.9122892618179321,
"learning_rate": 1.7014965024562846e-07,
"loss": 0.5651,
"step": 14509
},
{
"epoch": 0.92,
"grad_norm": 0.8981230854988098,
"learning_rate": 1.698843746962081e-07,
"loss": 0.5893,
"step": 14510
},
{
"epoch": 0.92,
"grad_norm": 0.9092260599136353,
"learning_rate": 1.6961930252615388e-07,
"loss": 0.5357,
"step": 14511
},
{
"epoch": 0.92,
"grad_norm": 0.8531707525253296,
"learning_rate": 1.6935443374662741e-07,
"loss": 0.5466,
"step": 14512
},
{
"epoch": 0.92,
"grad_norm": 0.890770673751831,
"learning_rate": 1.6908976836878088e-07,
"loss": 0.5673,
"step": 14513
},
{
"epoch": 0.92,
"grad_norm": 0.9549497961997986,
"learning_rate": 1.6882530640375872e-07,
"loss": 0.5602,
"step": 14514
},
{
"epoch": 0.92,
"grad_norm": 0.89844810962677,
"learning_rate": 1.68561047862697e-07,
"loss": 0.5751,
"step": 14515
},
{
"epoch": 0.92,
"grad_norm": 0.9014208912849426,
"learning_rate": 1.6829699275672186e-07,
"loss": 0.5715,
"step": 14516
},
{
"epoch": 0.92,
"grad_norm": 0.9853465557098389,
"learning_rate": 1.6803314109695157e-07,
"loss": 0.5917,
"step": 14517
},
{
"epoch": 0.92,
"grad_norm": 0.8697071075439453,
"learning_rate": 1.677694928944973e-07,
"loss": 0.5795,
"step": 14518
},
{
"epoch": 0.92,
"grad_norm": 0.9067200422286987,
"learning_rate": 1.6750604816045902e-07,
"loss": 0.5587,
"step": 14519
},
{
"epoch": 0.92,
"grad_norm": 0.899541437625885,
"learning_rate": 1.6724280690593008e-07,
"loss": 0.5799,
"step": 14520
},
{
"epoch": 0.92,
"grad_norm": 0.8571711778640747,
"learning_rate": 1.6697976914199497e-07,
"loss": 0.5425,
"step": 14521
},
{
"epoch": 0.92,
"grad_norm": 0.8938726782798767,
"learning_rate": 1.6671693487972818e-07,
"loss": 0.5316,
"step": 14522
},
{
"epoch": 0.92,
"grad_norm": 0.9331005215644836,
"learning_rate": 1.6645430413019858e-07,
"loss": 0.6139,
"step": 14523
},
{
"epoch": 0.92,
"grad_norm": 0.89864182472229,
"learning_rate": 1.6619187690446293e-07,
"loss": 0.5949,
"step": 14524
},
{
"epoch": 0.92,
"grad_norm": 0.8560614585876465,
"learning_rate": 1.659296532135718e-07,
"loss": 0.5573,
"step": 14525
},
{
"epoch": 0.92,
"grad_norm": 0.9034044742584229,
"learning_rate": 1.6566763306856638e-07,
"loss": 0.5997,
"step": 14526
},
{
"epoch": 0.92,
"grad_norm": 0.8897153735160828,
"learning_rate": 1.6540581648048003e-07,
"loss": 0.5859,
"step": 14527
},
{
"epoch": 0.92,
"grad_norm": 0.8635241985321045,
"learning_rate": 1.6514420346033565e-07,
"loss": 0.5781,
"step": 14528
},
{
"epoch": 0.92,
"grad_norm": 0.9002516865730286,
"learning_rate": 1.6488279401915052e-07,
"loss": 0.6092,
"step": 14529
},
{
"epoch": 0.92,
"grad_norm": 0.8698979020118713,
"learning_rate": 1.6462158816792973e-07,
"loss": 0.5981,
"step": 14530
},
{
"epoch": 0.92,
"grad_norm": 0.9019778966903687,
"learning_rate": 1.643605859176739e-07,
"loss": 0.5418,
"step": 14531
},
{
"epoch": 0.92,
"grad_norm": 0.8013015389442444,
"learning_rate": 1.6409978727937094e-07,
"loss": 0.5363,
"step": 14532
},
{
"epoch": 0.92,
"grad_norm": 0.9132005572319031,
"learning_rate": 1.6383919226400368e-07,
"loss": 0.5711,
"step": 14533
},
{
"epoch": 0.92,
"grad_norm": 0.8765537738800049,
"learning_rate": 1.6357880088254396e-07,
"loss": 0.5648,
"step": 14534
},
{
"epoch": 0.92,
"grad_norm": 0.8809942007064819,
"learning_rate": 1.633186131459563e-07,
"loss": 0.529,
"step": 14535
},
{
"epoch": 0.92,
"grad_norm": 0.9183542728424072,
"learning_rate": 1.6305862906519587e-07,
"loss": 0.5666,
"step": 14536
},
{
"epoch": 0.92,
"grad_norm": 0.9470837712287903,
"learning_rate": 1.6279884865121108e-07,
"loss": 0.5908,
"step": 14537
},
{
"epoch": 0.92,
"grad_norm": 0.9300570487976074,
"learning_rate": 1.6253927191493879e-07,
"loss": 0.6089,
"step": 14538
},
{
"epoch": 0.92,
"grad_norm": 0.8753145337104797,
"learning_rate": 1.622798988673091e-07,
"loss": 0.5747,
"step": 14539
},
{
"epoch": 0.92,
"grad_norm": 0.8983120918273926,
"learning_rate": 1.6202072951924386e-07,
"loss": 0.5561,
"step": 14540
},
{
"epoch": 0.92,
"grad_norm": 0.8710740208625793,
"learning_rate": 1.6176176388165598e-07,
"loss": 0.5833,
"step": 14541
},
{
"epoch": 0.92,
"grad_norm": 0.8573417067527771,
"learning_rate": 1.6150300196544955e-07,
"loss": 0.5251,
"step": 14542
},
{
"epoch": 0.92,
"grad_norm": 0.8613002300262451,
"learning_rate": 1.612444437815186e-07,
"loss": 0.5635,
"step": 14543
},
{
"epoch": 0.92,
"grad_norm": 0.7859262824058533,
"learning_rate": 1.6098608934075166e-07,
"loss": 0.5038,
"step": 14544
},
{
"epoch": 0.92,
"grad_norm": 0.9234058260917664,
"learning_rate": 1.607279386540278e-07,
"loss": 0.6108,
"step": 14545
},
{
"epoch": 0.92,
"grad_norm": 0.9104276895523071,
"learning_rate": 1.60469991732215e-07,
"loss": 0.5749,
"step": 14546
},
{
"epoch": 0.92,
"grad_norm": 0.9492734670639038,
"learning_rate": 1.6021224858617513e-07,
"loss": 0.5761,
"step": 14547
},
{
"epoch": 0.92,
"grad_norm": 0.8354452848434448,
"learning_rate": 1.5995470922676116e-07,
"loss": 0.636,
"step": 14548
},
{
"epoch": 0.92,
"grad_norm": 0.8883960247039795,
"learning_rate": 1.5969737366481774e-07,
"loss": 0.5439,
"step": 14549
},
{
"epoch": 0.92,
"grad_norm": 0.9534339308738708,
"learning_rate": 1.5944024191117958e-07,
"loss": 0.6199,
"step": 14550
},
{
"epoch": 0.92,
"grad_norm": 0.9829249382019043,
"learning_rate": 1.5918331397667298e-07,
"loss": 0.5705,
"step": 14551
},
{
"epoch": 0.92,
"grad_norm": 0.8952674865722656,
"learning_rate": 1.589265898721176e-07,
"loss": 0.5662,
"step": 14552
},
{
"epoch": 0.92,
"grad_norm": 0.8737239241600037,
"learning_rate": 1.586700696083232e-07,
"loss": 0.5791,
"step": 14553
},
{
"epoch": 0.92,
"grad_norm": 0.9135823249816895,
"learning_rate": 1.5841375319608943e-07,
"loss": 0.5703,
"step": 14554
},
{
"epoch": 0.92,
"grad_norm": 0.8945186734199524,
"learning_rate": 1.5815764064621043e-07,
"loss": 0.5432,
"step": 14555
},
{
"epoch": 0.92,
"grad_norm": 0.9179873466491699,
"learning_rate": 1.5790173196946924e-07,
"loss": 0.5429,
"step": 14556
},
{
"epoch": 0.92,
"grad_norm": 0.9364351630210876,
"learning_rate": 1.5764602717664224e-07,
"loss": 0.5607,
"step": 14557
},
{
"epoch": 0.92,
"grad_norm": 0.8686890602111816,
"learning_rate": 1.5739052627849581e-07,
"loss": 0.5407,
"step": 14558
},
{
"epoch": 0.92,
"grad_norm": 0.9229563474655151,
"learning_rate": 1.571352292857875e-07,
"loss": 0.523,
"step": 14559
},
{
"epoch": 0.92,
"grad_norm": 0.9978078603744507,
"learning_rate": 1.5688013620926757e-07,
"loss": 0.5824,
"step": 14560
},
{
"epoch": 0.92,
"grad_norm": 0.8744204640388489,
"learning_rate": 1.566252470596774e-07,
"loss": 0.5485,
"step": 14561
},
{
"epoch": 0.92,
"grad_norm": 0.8485409617424011,
"learning_rate": 1.5637056184774958e-07,
"loss": 0.588,
"step": 14562
},
{
"epoch": 0.92,
"grad_norm": 0.8641082048416138,
"learning_rate": 1.5611608058420714e-07,
"loss": 0.5467,
"step": 14563
},
{
"epoch": 0.92,
"grad_norm": 0.9121243357658386,
"learning_rate": 1.5586180327976598e-07,
"loss": 0.5439,
"step": 14564
},
{
"epoch": 0.92,
"grad_norm": 0.9285200834274292,
"learning_rate": 1.5560772994513251e-07,
"loss": 0.5821,
"step": 14565
},
{
"epoch": 0.92,
"grad_norm": 0.868303120136261,
"learning_rate": 1.55353860591006e-07,
"loss": 0.5052,
"step": 14566
},
{
"epoch": 0.92,
"grad_norm": 0.8897990584373474,
"learning_rate": 1.5510019522807397e-07,
"loss": 0.545,
"step": 14567
},
{
"epoch": 0.92,
"grad_norm": 0.8986243009567261,
"learning_rate": 1.5484673386701953e-07,
"loss": 0.6177,
"step": 14568
},
{
"epoch": 0.92,
"grad_norm": 0.8620361685752869,
"learning_rate": 1.545934765185131e-07,
"loss": 0.5743,
"step": 14569
},
{
"epoch": 0.92,
"grad_norm": 0.8731402158737183,
"learning_rate": 1.5434042319321996e-07,
"loss": 0.5466,
"step": 14570
},
{
"epoch": 0.92,
"grad_norm": 0.8847206830978394,
"learning_rate": 1.5408757390179496e-07,
"loss": 0.548,
"step": 14571
},
{
"epoch": 0.92,
"grad_norm": 0.8469605445861816,
"learning_rate": 1.5383492865488459e-07,
"loss": 0.5754,
"step": 14572
},
{
"epoch": 0.92,
"grad_norm": 0.8870298266410828,
"learning_rate": 1.5358248746312588e-07,
"loss": 0.5896,
"step": 14573
},
{
"epoch": 0.92,
"grad_norm": 0.9190220236778259,
"learning_rate": 1.533302503371503e-07,
"loss": 0.5394,
"step": 14574
},
{
"epoch": 0.92,
"grad_norm": 0.9334941506385803,
"learning_rate": 1.5307821728757722e-07,
"loss": 0.5883,
"step": 14575
},
{
"epoch": 0.92,
"grad_norm": 0.9348692297935486,
"learning_rate": 1.5282638832501917e-07,
"loss": 0.6218,
"step": 14576
},
{
"epoch": 0.92,
"grad_norm": 0.8661801218986511,
"learning_rate": 1.5257476346007938e-07,
"loss": 0.5299,
"step": 14577
},
{
"epoch": 0.92,
"grad_norm": 0.9223611354827881,
"learning_rate": 1.523233427033538e-07,
"loss": 0.5646,
"step": 14578
},
{
"epoch": 0.92,
"grad_norm": 0.8899549245834351,
"learning_rate": 1.5207212606542786e-07,
"loss": 0.6004,
"step": 14579
},
{
"epoch": 0.92,
"grad_norm": 0.8400233387947083,
"learning_rate": 1.518211135568809e-07,
"loss": 0.5215,
"step": 14580
},
{
"epoch": 0.92,
"grad_norm": 0.8840736746788025,
"learning_rate": 1.5157030518828054e-07,
"loss": 0.6081,
"step": 14581
},
{
"epoch": 0.92,
"grad_norm": 0.8895472884178162,
"learning_rate": 1.513197009701889e-07,
"loss": 0.5254,
"step": 14582
},
{
"epoch": 0.92,
"grad_norm": 0.8849679827690125,
"learning_rate": 1.510693009131564e-07,
"loss": 0.5798,
"step": 14583
},
{
"epoch": 0.92,
"grad_norm": 0.8696837425231934,
"learning_rate": 1.50819105027728e-07,
"loss": 0.4908,
"step": 14584
},
{
"epoch": 0.92,
"grad_norm": 0.8720855116844177,
"learning_rate": 1.5056911332443801e-07,
"loss": 0.6337,
"step": 14585
},
{
"epoch": 0.92,
"grad_norm": 0.9008244276046753,
"learning_rate": 1.5031932581381247e-07,
"loss": 0.6017,
"step": 14586
},
{
"epoch": 0.92,
"grad_norm": 0.8799957036972046,
"learning_rate": 1.5006974250636906e-07,
"loss": 0.5997,
"step": 14587
},
{
"epoch": 0.92,
"grad_norm": 0.8658244013786316,
"learning_rate": 1.498203634126183e-07,
"loss": 0.5255,
"step": 14588
},
{
"epoch": 0.92,
"grad_norm": 0.876167893409729,
"learning_rate": 1.4957118854305842e-07,
"loss": 0.5822,
"step": 14589
},
{
"epoch": 0.92,
"grad_norm": 0.926031768321991,
"learning_rate": 1.4932221790818268e-07,
"loss": 0.5881,
"step": 14590
},
{
"epoch": 0.92,
"grad_norm": 1.0563932657241821,
"learning_rate": 1.4907345151847387e-07,
"loss": 0.5859,
"step": 14591
},
{
"epoch": 0.92,
"grad_norm": 0.832943320274353,
"learning_rate": 1.4882488938440688e-07,
"loss": 0.5449,
"step": 14592
},
{
"epoch": 0.92,
"grad_norm": 0.97652667760849,
"learning_rate": 1.485765315164478e-07,
"loss": 0.5737,
"step": 14593
},
{
"epoch": 0.92,
"grad_norm": 0.8609157800674438,
"learning_rate": 1.483283779250544e-07,
"loss": 0.5534,
"step": 14594
},
{
"epoch": 0.92,
"grad_norm": 1.0561258792877197,
"learning_rate": 1.4808042862067496e-07,
"loss": 0.5937,
"step": 14595
},
{
"epoch": 0.92,
"grad_norm": 0.9125807881355286,
"learning_rate": 1.4783268361375058e-07,
"loss": 0.6365,
"step": 14596
},
{
"epoch": 0.92,
"grad_norm": 0.8677931427955627,
"learning_rate": 1.4758514291471238e-07,
"loss": 0.5369,
"step": 14597
},
{
"epoch": 0.92,
"grad_norm": 1.0120726823806763,
"learning_rate": 1.4733780653398254e-07,
"loss": 0.5515,
"step": 14598
},
{
"epoch": 0.92,
"grad_norm": 0.9230685234069824,
"learning_rate": 1.4709067448197722e-07,
"loss": 0.6248,
"step": 14599
},
{
"epoch": 0.92,
"grad_norm": 0.8900234699249268,
"learning_rate": 1.4684374676910197e-07,
"loss": 0.5869,
"step": 14600
},
{
"epoch": 0.93,
"grad_norm": 0.8961969614028931,
"learning_rate": 1.4659702340575287e-07,
"loss": 0.5668,
"step": 14601
},
{
"epoch": 0.93,
"grad_norm": 0.843304455280304,
"learning_rate": 1.4635050440232002e-07,
"loss": 0.5334,
"step": 14602
},
{
"epoch": 0.93,
"grad_norm": 0.8992311358451843,
"learning_rate": 1.4610418976918172e-07,
"loss": 0.5863,
"step": 14603
},
{
"epoch": 0.93,
"grad_norm": 0.9412532448768616,
"learning_rate": 1.4585807951671194e-07,
"loss": 0.5809,
"step": 14604
},
{
"epoch": 0.93,
"grad_norm": 0.8952974677085876,
"learning_rate": 1.4561217365527124e-07,
"loss": 0.6129,
"step": 14605
},
{
"epoch": 0.93,
"grad_norm": 0.9405317306518555,
"learning_rate": 1.453664721952147e-07,
"loss": 0.5574,
"step": 14606
},
{
"epoch": 0.93,
"grad_norm": 0.8412303924560547,
"learning_rate": 1.451209751468885e-07,
"loss": 0.5746,
"step": 14607
},
{
"epoch": 0.93,
"grad_norm": 0.896740734577179,
"learning_rate": 1.448756825206288e-07,
"loss": 0.5703,
"step": 14608
},
{
"epoch": 0.93,
"grad_norm": 0.9362704753875732,
"learning_rate": 1.4463059432676395e-07,
"loss": 0.618,
"step": 14609
},
{
"epoch": 0.93,
"grad_norm": 0.89310622215271,
"learning_rate": 1.4438571057561523e-07,
"loss": 0.5166,
"step": 14610
},
{
"epoch": 0.93,
"grad_norm": 0.8676783442497253,
"learning_rate": 1.4414103127749157e-07,
"loss": 0.5502,
"step": 14611
},
{
"epoch": 0.93,
"grad_norm": 0.9016738533973694,
"learning_rate": 1.4389655644269752e-07,
"loss": 0.6399,
"step": 14612
},
{
"epoch": 0.93,
"grad_norm": 0.9519631266593933,
"learning_rate": 1.4365228608152647e-07,
"loss": 0.6178,
"step": 14613
},
{
"epoch": 0.93,
"grad_norm": 0.9060442447662354,
"learning_rate": 1.4340822020426304e-07,
"loss": 0.6017,
"step": 14614
},
{
"epoch": 0.93,
"grad_norm": 0.922366201877594,
"learning_rate": 1.4316435882118563e-07,
"loss": 0.6266,
"step": 14615
},
{
"epoch": 0.93,
"grad_norm": 0.9243087768554688,
"learning_rate": 1.429207019425599e-07,
"loss": 0.5854,
"step": 14616
},
{
"epoch": 0.93,
"grad_norm": 0.9128119945526123,
"learning_rate": 1.426772495786477e-07,
"loss": 0.5616,
"step": 14617
},
{
"epoch": 0.93,
"grad_norm": 0.8875778913497925,
"learning_rate": 1.4243400173969968e-07,
"loss": 0.5322,
"step": 14618
},
{
"epoch": 0.93,
"grad_norm": 0.875525712966919,
"learning_rate": 1.4219095843595654e-07,
"loss": 0.5701,
"step": 14619
},
{
"epoch": 0.93,
"grad_norm": 0.8632004857063293,
"learning_rate": 1.4194811967765344e-07,
"loss": 0.555,
"step": 14620
},
{
"epoch": 0.93,
"grad_norm": 0.9275280237197876,
"learning_rate": 1.417054854750155e-07,
"loss": 0.5548,
"step": 14621
},
{
"epoch": 0.93,
"grad_norm": 0.893173098564148,
"learning_rate": 1.414630558382579e-07,
"loss": 0.5044,
"step": 14622
},
{
"epoch": 0.93,
"grad_norm": 0.8877639174461365,
"learning_rate": 1.4122083077759087e-07,
"loss": 0.556,
"step": 14623
},
{
"epoch": 0.93,
"grad_norm": 0.8409522771835327,
"learning_rate": 1.409788103032106e-07,
"loss": 0.5588,
"step": 14624
},
{
"epoch": 0.93,
"grad_norm": 0.8505089282989502,
"learning_rate": 1.4073699442531007e-07,
"loss": 0.5673,
"step": 14625
},
{
"epoch": 0.93,
"grad_norm": 0.865674614906311,
"learning_rate": 1.4049538315407064e-07,
"loss": 0.5732,
"step": 14626
},
{
"epoch": 0.93,
"grad_norm": 0.8754248023033142,
"learning_rate": 1.4025397649966577e-07,
"loss": 0.5596,
"step": 14627
},
{
"epoch": 0.93,
"grad_norm": 0.8468300104141235,
"learning_rate": 1.400127744722596e-07,
"loss": 0.5757,
"step": 14628
},
{
"epoch": 0.93,
"grad_norm": 0.9094893336296082,
"learning_rate": 1.3977177708200896e-07,
"loss": 0.5904,
"step": 14629
},
{
"epoch": 0.93,
"grad_norm": 0.9047295451164246,
"learning_rate": 1.395309843390613e-07,
"loss": 0.584,
"step": 14630
},
{
"epoch": 0.93,
"grad_norm": 0.8569273948669434,
"learning_rate": 1.3929039625355633e-07,
"loss": 0.5859,
"step": 14631
},
{
"epoch": 0.93,
"grad_norm": 0.9276108145713806,
"learning_rate": 1.3905001283562257e-07,
"loss": 0.5994,
"step": 14632
},
{
"epoch": 0.93,
"grad_norm": 0.8770352602005005,
"learning_rate": 1.3880983409538252e-07,
"loss": 0.562,
"step": 14633
},
{
"epoch": 0.93,
"grad_norm": 0.9754252433776855,
"learning_rate": 1.3856986004295082e-07,
"loss": 0.5636,
"step": 14634
},
{
"epoch": 0.93,
"grad_norm": 0.8698052763938904,
"learning_rate": 1.3833009068842995e-07,
"loss": 0.567,
"step": 14635
},
{
"epoch": 0.93,
"grad_norm": 0.9316123723983765,
"learning_rate": 1.3809052604191632e-07,
"loss": 0.5865,
"step": 14636
},
{
"epoch": 0.93,
"grad_norm": 0.9362663626670837,
"learning_rate": 1.3785116611349736e-07,
"loss": 0.5783,
"step": 14637
},
{
"epoch": 0.93,
"grad_norm": 0.91670161485672,
"learning_rate": 1.3761201091325172e-07,
"loss": 0.5896,
"step": 14638
},
{
"epoch": 0.93,
"grad_norm": 0.856816828250885,
"learning_rate": 1.3737306045124966e-07,
"loss": 0.6125,
"step": 14639
},
{
"epoch": 0.93,
"grad_norm": 0.9262798428535461,
"learning_rate": 1.3713431473755147e-07,
"loss": 0.5683,
"step": 14640
},
{
"epoch": 0.93,
"grad_norm": 0.9130752086639404,
"learning_rate": 1.3689577378221019e-07,
"loss": 0.6457,
"step": 14641
},
{
"epoch": 0.93,
"grad_norm": 0.8534045815467834,
"learning_rate": 1.3665743759527173e-07,
"loss": 0.5155,
"step": 14642
},
{
"epoch": 0.93,
"grad_norm": 0.9429634213447571,
"learning_rate": 1.3641930618676912e-07,
"loss": 0.5988,
"step": 14643
},
{
"epoch": 0.93,
"grad_norm": 0.9114389419555664,
"learning_rate": 1.3618137956673105e-07,
"loss": 0.5677,
"step": 14644
},
{
"epoch": 0.93,
"grad_norm": 0.9275795221328735,
"learning_rate": 1.3594365774517447e-07,
"loss": 0.5971,
"step": 14645
},
{
"epoch": 0.93,
"grad_norm": 0.9180171489715576,
"learning_rate": 1.357061407321103e-07,
"loss": 0.545,
"step": 14646
},
{
"epoch": 0.93,
"grad_norm": 0.8974061608314514,
"learning_rate": 1.3546882853753885e-07,
"loss": 0.5988,
"step": 14647
},
{
"epoch": 0.93,
"grad_norm": 0.8184780478477478,
"learning_rate": 1.3523172117145212e-07,
"loss": 0.606,
"step": 14648
},
{
"epoch": 0.93,
"grad_norm": 0.8791584968566895,
"learning_rate": 1.349948186438349e-07,
"loss": 0.5674,
"step": 14649
},
{
"epoch": 0.93,
"grad_norm": 0.900364100933075,
"learning_rate": 1.347581209646609e-07,
"loss": 0.5965,
"step": 14650
},
{
"epoch": 0.93,
"grad_norm": 0.9039656519889832,
"learning_rate": 1.3452162814389824e-07,
"loss": 0.5437,
"step": 14651
},
{
"epoch": 0.93,
"grad_norm": 0.8871658444404602,
"learning_rate": 1.342853401915034e-07,
"loss": 0.6313,
"step": 14652
},
{
"epoch": 0.93,
"grad_norm": 0.8789704442024231,
"learning_rate": 1.3404925711742734e-07,
"loss": 0.6161,
"step": 14653
},
{
"epoch": 0.93,
"grad_norm": 0.8579655885696411,
"learning_rate": 1.3381337893160818e-07,
"loss": 0.5503,
"step": 14654
},
{
"epoch": 0.93,
"grad_norm": 0.9106957316398621,
"learning_rate": 1.3357770564398075e-07,
"loss": 0.5624,
"step": 14655
},
{
"epoch": 0.93,
"grad_norm": 0.8811336159706116,
"learning_rate": 1.333422372644666e-07,
"loss": 0.5981,
"step": 14656
},
{
"epoch": 0.93,
"grad_norm": 0.8811172842979431,
"learning_rate": 1.331069738029811e-07,
"loss": 0.5696,
"step": 14657
},
{
"epoch": 0.93,
"grad_norm": 0.8447614908218384,
"learning_rate": 1.3287191526942968e-07,
"loss": 0.6139,
"step": 14658
},
{
"epoch": 0.93,
"grad_norm": 0.8404377102851868,
"learning_rate": 1.3263706167371104e-07,
"loss": 0.5405,
"step": 14659
},
{
"epoch": 0.93,
"grad_norm": 0.9460970163345337,
"learning_rate": 1.324024130257129e-07,
"loss": 0.6109,
"step": 14660
},
{
"epoch": 0.93,
"grad_norm": 0.862629234790802,
"learning_rate": 1.3216796933531672e-07,
"loss": 0.5738,
"step": 14661
},
{
"epoch": 0.93,
"grad_norm": 0.8235554695129395,
"learning_rate": 1.31933730612393e-07,
"loss": 0.598,
"step": 14662
},
{
"epoch": 0.93,
"grad_norm": 0.8776896595954895,
"learning_rate": 1.316996968668044e-07,
"loss": 0.5908,
"step": 14663
},
{
"epoch": 0.93,
"grad_norm": 0.8844308257102966,
"learning_rate": 1.3146586810840745e-07,
"loss": 0.5464,
"step": 14664
},
{
"epoch": 0.93,
"grad_norm": 0.9336026310920715,
"learning_rate": 1.312322443470454e-07,
"loss": 0.5776,
"step": 14665
},
{
"epoch": 0.93,
"grad_norm": 0.9121221303939819,
"learning_rate": 1.309988255925565e-07,
"loss": 0.5844,
"step": 14666
},
{
"epoch": 0.93,
"grad_norm": 0.9228976964950562,
"learning_rate": 1.30765611854769e-07,
"loss": 0.6109,
"step": 14667
},
{
"epoch": 0.93,
"grad_norm": 0.9101218581199646,
"learning_rate": 1.305326031435028e-07,
"loss": 0.5677,
"step": 14668
},
{
"epoch": 0.93,
"grad_norm": 0.9400882720947266,
"learning_rate": 1.3029979946856953e-07,
"loss": 0.6526,
"step": 14669
},
{
"epoch": 0.93,
"grad_norm": 0.8727139830589294,
"learning_rate": 1.3006720083977076e-07,
"loss": 0.5734,
"step": 14670
},
{
"epoch": 0.93,
"grad_norm": 0.8939454555511475,
"learning_rate": 1.2983480726690033e-07,
"loss": 0.5467,
"step": 14671
},
{
"epoch": 0.93,
"grad_norm": 0.962372899055481,
"learning_rate": 1.296026187597449e-07,
"loss": 0.5609,
"step": 14672
},
{
"epoch": 0.93,
"grad_norm": 0.8936379551887512,
"learning_rate": 1.2937063532807992e-07,
"loss": 0.5464,
"step": 14673
},
{
"epoch": 0.93,
"grad_norm": 0.8870679140090942,
"learning_rate": 1.2913885698167427e-07,
"loss": 0.5659,
"step": 14674
},
{
"epoch": 0.93,
"grad_norm": 0.901860237121582,
"learning_rate": 1.2890728373028626e-07,
"loss": 0.5578,
"step": 14675
},
{
"epoch": 0.93,
"grad_norm": 0.8647616505622864,
"learning_rate": 1.2867591558366755e-07,
"loss": 0.5257,
"step": 14676
},
{
"epoch": 0.93,
"grad_norm": 0.8566569089889526,
"learning_rate": 1.2844475255156087e-07,
"loss": 0.5479,
"step": 14677
},
{
"epoch": 0.93,
"grad_norm": 0.8648780584335327,
"learning_rate": 1.2821379464369732e-07,
"loss": 0.5547,
"step": 14678
},
{
"epoch": 0.93,
"grad_norm": 0.9167495369911194,
"learning_rate": 1.2798304186980358e-07,
"loss": 0.5968,
"step": 14679
},
{
"epoch": 0.93,
"grad_norm": 0.877565860748291,
"learning_rate": 1.277524942395958e-07,
"loss": 0.5279,
"step": 14680
},
{
"epoch": 0.93,
"grad_norm": 0.9053774476051331,
"learning_rate": 1.275221517627806e-07,
"loss": 0.5837,
"step": 14681
},
{
"epoch": 0.93,
"grad_norm": 0.8372228145599365,
"learning_rate": 1.2729201444905803e-07,
"loss": 0.5663,
"step": 14682
},
{
"epoch": 0.93,
"grad_norm": 0.8383122086524963,
"learning_rate": 1.2706208230811812e-07,
"loss": 0.5338,
"step": 14683
},
{
"epoch": 0.93,
"grad_norm": 0.8852533102035522,
"learning_rate": 1.2683235534964088e-07,
"loss": 0.6176,
"step": 14684
},
{
"epoch": 0.93,
"grad_norm": 0.9276344180107117,
"learning_rate": 1.2660283358330195e-07,
"loss": 0.5446,
"step": 14685
},
{
"epoch": 0.93,
"grad_norm": 0.9432269334793091,
"learning_rate": 1.263735170187641e-07,
"loss": 0.5843,
"step": 14686
},
{
"epoch": 0.93,
"grad_norm": 0.8667259812355042,
"learning_rate": 1.26144405665683e-07,
"loss": 0.5467,
"step": 14687
},
{
"epoch": 0.93,
"grad_norm": 0.8887820243835449,
"learning_rate": 1.2591549953370586e-07,
"loss": 0.5782,
"step": 14688
},
{
"epoch": 0.93,
"grad_norm": 0.908986508846283,
"learning_rate": 1.2568679863247168e-07,
"loss": 0.5866,
"step": 14689
},
{
"epoch": 0.93,
"grad_norm": 0.8384619951248169,
"learning_rate": 1.2545830297161e-07,
"loss": 0.5293,
"step": 14690
},
{
"epoch": 0.93,
"grad_norm": 0.8378182053565979,
"learning_rate": 1.2523001256074196e-07,
"loss": 0.5163,
"step": 14691
},
{
"epoch": 0.93,
"grad_norm": 0.9510812759399414,
"learning_rate": 1.2500192740947936e-07,
"loss": 0.5682,
"step": 14692
},
{
"epoch": 0.93,
"grad_norm": 0.8728382587432861,
"learning_rate": 1.2477404752742784e-07,
"loss": 0.5734,
"step": 14693
},
{
"epoch": 0.93,
"grad_norm": 0.9147626757621765,
"learning_rate": 1.2454637292418082e-07,
"loss": 0.573,
"step": 14694
},
{
"epoch": 0.93,
"grad_norm": 0.9553124308586121,
"learning_rate": 1.2431890360932507e-07,
"loss": 0.5857,
"step": 14695
},
{
"epoch": 0.93,
"grad_norm": 0.9232133626937866,
"learning_rate": 1.2409163959244019e-07,
"loss": 0.5843,
"step": 14696
},
{
"epoch": 0.93,
"grad_norm": 0.8695604801177979,
"learning_rate": 1.2386458088309296e-07,
"loss": 0.5436,
"step": 14697
},
{
"epoch": 0.93,
"grad_norm": 0.9066473245620728,
"learning_rate": 1.2363772749084625e-07,
"loss": 0.5383,
"step": 14698
},
{
"epoch": 0.93,
"grad_norm": 0.9268561005592346,
"learning_rate": 1.2341107942525132e-07,
"loss": 0.5507,
"step": 14699
},
{
"epoch": 0.93,
"grad_norm": 0.8040413856506348,
"learning_rate": 1.2318463669585112e-07,
"loss": 0.5478,
"step": 14700
},
{
"epoch": 0.93,
"grad_norm": 0.8859974145889282,
"learning_rate": 1.229583993121808e-07,
"loss": 0.5666,
"step": 14701
},
{
"epoch": 0.93,
"grad_norm": 0.9229069948196411,
"learning_rate": 1.2273236728376604e-07,
"loss": 0.5676,
"step": 14702
},
{
"epoch": 0.93,
"grad_norm": 0.853150486946106,
"learning_rate": 1.2250654062012478e-07,
"loss": 0.5378,
"step": 14703
},
{
"epoch": 0.93,
"grad_norm": 0.8615385890007019,
"learning_rate": 1.2228091933076613e-07,
"loss": 0.5285,
"step": 14704
},
{
"epoch": 0.93,
"grad_norm": 0.8250787854194641,
"learning_rate": 1.2205550342518803e-07,
"loss": 0.5093,
"step": 14705
},
{
"epoch": 0.93,
"grad_norm": 0.8962552547454834,
"learning_rate": 1.2183029291288452e-07,
"loss": 0.5964,
"step": 14706
},
{
"epoch": 0.93,
"grad_norm": 0.8384519815444946,
"learning_rate": 1.2160528780333803e-07,
"loss": 0.5196,
"step": 14707
},
{
"epoch": 0.93,
"grad_norm": 0.8676429986953735,
"learning_rate": 1.2138048810602154e-07,
"loss": 0.5945,
"step": 14708
},
{
"epoch": 0.93,
"grad_norm": 0.8354253172874451,
"learning_rate": 1.2115589383040083e-07,
"loss": 0.604,
"step": 14709
},
{
"epoch": 0.93,
"grad_norm": 0.9310884475708008,
"learning_rate": 1.2093150498593387e-07,
"loss": 0.5702,
"step": 14710
},
{
"epoch": 0.93,
"grad_norm": 0.878158450126648,
"learning_rate": 1.2070732158206754e-07,
"loss": 0.6138,
"step": 14711
},
{
"epoch": 0.93,
"grad_norm": 0.9077056050300598,
"learning_rate": 1.2048334362824265e-07,
"loss": 0.6086,
"step": 14712
},
{
"epoch": 0.93,
"grad_norm": 0.8881044983863831,
"learning_rate": 1.202595711338894e-07,
"loss": 0.5935,
"step": 14713
},
{
"epoch": 0.93,
"grad_norm": 0.8508563041687012,
"learning_rate": 1.2003600410842974e-07,
"loss": 0.5741,
"step": 14714
},
{
"epoch": 0.93,
"grad_norm": 0.9420803785324097,
"learning_rate": 1.1981264256127832e-07,
"loss": 0.5942,
"step": 14715
},
{
"epoch": 0.93,
"grad_norm": 0.8780478239059448,
"learning_rate": 1.1958948650183988e-07,
"loss": 0.5893,
"step": 14716
},
{
"epoch": 0.93,
"grad_norm": 0.8164428472518921,
"learning_rate": 1.1936653593950964e-07,
"loss": 0.5472,
"step": 14717
},
{
"epoch": 0.93,
"grad_norm": 0.901099681854248,
"learning_rate": 1.1914379088367677e-07,
"loss": 0.5937,
"step": 14718
},
{
"epoch": 0.93,
"grad_norm": 0.8668814897537231,
"learning_rate": 1.1892125134371935e-07,
"loss": 0.5709,
"step": 14719
},
{
"epoch": 0.93,
"grad_norm": 0.9474896788597107,
"learning_rate": 1.1869891732900762e-07,
"loss": 0.607,
"step": 14720
},
{
"epoch": 0.93,
"grad_norm": 0.9030824303627014,
"learning_rate": 1.1847678884890467e-07,
"loss": 0.5238,
"step": 14721
},
{
"epoch": 0.93,
"grad_norm": 0.81502366065979,
"learning_rate": 1.1825486591276136e-07,
"loss": 0.5206,
"step": 14722
},
{
"epoch": 0.93,
"grad_norm": 0.9036549925804138,
"learning_rate": 1.1803314852992409e-07,
"loss": 0.6143,
"step": 14723
},
{
"epoch": 0.93,
"grad_norm": 0.8753872513771057,
"learning_rate": 1.1781163670972762e-07,
"loss": 0.571,
"step": 14724
},
{
"epoch": 0.93,
"grad_norm": 0.9172664284706116,
"learning_rate": 1.1759033046149948e-07,
"loss": 0.5988,
"step": 14725
},
{
"epoch": 0.93,
"grad_norm": 0.8600670695304871,
"learning_rate": 1.1736922979455778e-07,
"loss": 0.6114,
"step": 14726
},
{
"epoch": 0.93,
"grad_norm": 0.9102545380592346,
"learning_rate": 1.1714833471821175e-07,
"loss": 0.612,
"step": 14727
},
{
"epoch": 0.93,
"grad_norm": 0.8528123497962952,
"learning_rate": 1.1692764524176337e-07,
"loss": 0.5978,
"step": 14728
},
{
"epoch": 0.93,
"grad_norm": 0.8206274509429932,
"learning_rate": 1.1670716137450577e-07,
"loss": 0.5306,
"step": 14729
},
{
"epoch": 0.93,
"grad_norm": 0.8492806553840637,
"learning_rate": 1.1648688312572099e-07,
"loss": 0.5431,
"step": 14730
},
{
"epoch": 0.93,
"grad_norm": 0.9542714357376099,
"learning_rate": 1.1626681050468492e-07,
"loss": 0.5932,
"step": 14731
},
{
"epoch": 0.93,
"grad_norm": 0.9290028810501099,
"learning_rate": 1.1604694352066459e-07,
"loss": 0.6256,
"step": 14732
},
{
"epoch": 0.93,
"grad_norm": 0.8055222034454346,
"learning_rate": 1.1582728218291761e-07,
"loss": 0.5609,
"step": 14733
},
{
"epoch": 0.93,
"grad_norm": 0.874623715877533,
"learning_rate": 1.1560782650069269e-07,
"loss": 0.5352,
"step": 14734
},
{
"epoch": 0.93,
"grad_norm": 0.9114512205123901,
"learning_rate": 1.153885764832302e-07,
"loss": 0.5984,
"step": 14735
},
{
"epoch": 0.93,
"grad_norm": 0.9033612608909607,
"learning_rate": 1.1516953213976278e-07,
"loss": 0.602,
"step": 14736
},
{
"epoch": 0.93,
"grad_norm": 0.905761182308197,
"learning_rate": 1.1495069347951416e-07,
"loss": 0.573,
"step": 14737
},
{
"epoch": 0.93,
"grad_norm": 0.9094721078872681,
"learning_rate": 1.1473206051169694e-07,
"loss": 0.6089,
"step": 14738
},
{
"epoch": 0.93,
"grad_norm": 0.875142514705658,
"learning_rate": 1.1451363324551822e-07,
"loss": 0.5662,
"step": 14739
},
{
"epoch": 0.93,
"grad_norm": 0.9052537679672241,
"learning_rate": 1.1429541169017511e-07,
"loss": 0.5498,
"step": 14740
},
{
"epoch": 0.93,
"grad_norm": 0.9127451777458191,
"learning_rate": 1.1407739585485633e-07,
"loss": 0.5891,
"step": 14741
},
{
"epoch": 0.93,
"grad_norm": 0.9252974390983582,
"learning_rate": 1.1385958574874178e-07,
"loss": 0.5736,
"step": 14742
},
{
"epoch": 0.93,
"grad_norm": 0.9759803414344788,
"learning_rate": 1.1364198138100191e-07,
"loss": 0.64,
"step": 14743
},
{
"epoch": 0.93,
"grad_norm": 1.012252926826477,
"learning_rate": 1.1342458276079937e-07,
"loss": 0.6271,
"step": 14744
},
{
"epoch": 0.93,
"grad_norm": 0.8845311403274536,
"learning_rate": 1.1320738989728963e-07,
"loss": 0.5,
"step": 14745
},
{
"epoch": 0.93,
"grad_norm": 0.7984757423400879,
"learning_rate": 1.1299040279961593e-07,
"loss": 0.516,
"step": 14746
},
{
"epoch": 0.93,
"grad_norm": 0.8785191774368286,
"learning_rate": 1.1277362147691595e-07,
"loss": 0.5672,
"step": 14747
},
{
"epoch": 0.93,
"grad_norm": 0.8724797368049622,
"learning_rate": 1.125570459383174e-07,
"loss": 0.5881,
"step": 14748
},
{
"epoch": 0.93,
"grad_norm": 0.8874450325965881,
"learning_rate": 1.1234067619293909e-07,
"loss": 0.5522,
"step": 14749
},
{
"epoch": 0.93,
"grad_norm": 0.9328641891479492,
"learning_rate": 1.1212451224989262e-07,
"loss": 0.5661,
"step": 14750
},
{
"epoch": 0.93,
"grad_norm": 0.9419904351234436,
"learning_rate": 1.1190855411827906e-07,
"loss": 0.5788,
"step": 14751
},
{
"epoch": 0.93,
"grad_norm": 0.945692241191864,
"learning_rate": 1.1169280180719111e-07,
"loss": 0.5635,
"step": 14752
},
{
"epoch": 0.93,
"grad_norm": 0.8278078436851501,
"learning_rate": 1.114772553257154e-07,
"loss": 0.5664,
"step": 14753
},
{
"epoch": 0.93,
"grad_norm": 0.8511500358581543,
"learning_rate": 1.1126191468292579e-07,
"loss": 0.5296,
"step": 14754
},
{
"epoch": 0.93,
"grad_norm": 0.921492338180542,
"learning_rate": 1.1104677988789004e-07,
"loss": 0.6163,
"step": 14755
},
{
"epoch": 0.93,
"grad_norm": 0.8180157542228699,
"learning_rate": 1.1083185094966753e-07,
"loss": 0.519,
"step": 14756
},
{
"epoch": 0.93,
"grad_norm": 0.9290024638175964,
"learning_rate": 1.1061712787730716e-07,
"loss": 0.5699,
"step": 14757
},
{
"epoch": 0.93,
"grad_norm": 0.8530245423316956,
"learning_rate": 1.1040261067985114e-07,
"loss": 0.5732,
"step": 14758
},
{
"epoch": 0.94,
"grad_norm": 0.8908236026763916,
"learning_rate": 1.1018829936633113e-07,
"loss": 0.6248,
"step": 14759
},
{
"epoch": 0.94,
"grad_norm": 0.9305959939956665,
"learning_rate": 1.0997419394577158e-07,
"loss": 0.6109,
"step": 14760
},
{
"epoch": 0.94,
"grad_norm": 0.8903090357780457,
"learning_rate": 1.0976029442718694e-07,
"loss": 0.5836,
"step": 14761
},
{
"epoch": 0.94,
"grad_norm": 0.9302195310592651,
"learning_rate": 1.0954660081958502e-07,
"loss": 0.5357,
"step": 14762
},
{
"epoch": 0.94,
"grad_norm": 0.9230768084526062,
"learning_rate": 1.0933311313196304e-07,
"loss": 0.6302,
"step": 14763
},
{
"epoch": 0.94,
"grad_norm": 0.876150906085968,
"learning_rate": 1.091198313733105e-07,
"loss": 0.5398,
"step": 14764
},
{
"epoch": 0.94,
"grad_norm": 0.8719222545623779,
"learning_rate": 1.0890675555260688e-07,
"loss": 0.5634,
"step": 14765
},
{
"epoch": 0.94,
"grad_norm": 0.8793936967849731,
"learning_rate": 1.086938856788261e-07,
"loss": 0.558,
"step": 14766
},
{
"epoch": 0.94,
"grad_norm": 0.9724277257919312,
"learning_rate": 1.0848122176092935e-07,
"loss": 0.5401,
"step": 14767
},
{
"epoch": 0.94,
"grad_norm": 0.8842143416404724,
"learning_rate": 1.0826876380787221e-07,
"loss": 0.5905,
"step": 14768
},
{
"epoch": 0.94,
"grad_norm": 0.8512188196182251,
"learning_rate": 1.0805651182860033e-07,
"loss": 0.5681,
"step": 14769
},
{
"epoch": 0.94,
"grad_norm": 0.8788979649543762,
"learning_rate": 1.0784446583205099e-07,
"loss": 0.5593,
"step": 14770
},
{
"epoch": 0.94,
"grad_norm": 0.9469314217567444,
"learning_rate": 1.0763262582715206e-07,
"loss": 0.6131,
"step": 14771
},
{
"epoch": 0.94,
"grad_norm": 0.8478160500526428,
"learning_rate": 1.0742099182282529e-07,
"loss": 0.5218,
"step": 14772
},
{
"epoch": 0.94,
"grad_norm": 0.8808424472808838,
"learning_rate": 1.0720956382797965e-07,
"loss": 0.5471,
"step": 14773
},
{
"epoch": 0.94,
"grad_norm": 0.9337725639343262,
"learning_rate": 1.0699834185151802e-07,
"loss": 0.5696,
"step": 14774
},
{
"epoch": 0.94,
"grad_norm": 0.9188077449798584,
"learning_rate": 1.0678732590233553e-07,
"loss": 0.5836,
"step": 14775
},
{
"epoch": 0.94,
"grad_norm": 0.8440690040588379,
"learning_rate": 1.0657651598931563e-07,
"loss": 0.5362,
"step": 14776
},
{
"epoch": 0.94,
"grad_norm": 0.886298418045044,
"learning_rate": 1.0636591212133673e-07,
"loss": 0.5703,
"step": 14777
},
{
"epoch": 0.94,
"grad_norm": 0.8677876591682434,
"learning_rate": 1.0615551430726456e-07,
"loss": 0.581,
"step": 14778
},
{
"epoch": 0.94,
"grad_norm": 0.8505701422691345,
"learning_rate": 1.0594532255595979e-07,
"loss": 0.554,
"step": 14779
},
{
"epoch": 0.94,
"grad_norm": 0.8900310397148132,
"learning_rate": 1.0573533687627258e-07,
"loss": 0.5679,
"step": 14780
},
{
"epoch": 0.94,
"grad_norm": 0.7801492214202881,
"learning_rate": 1.0552555727704417e-07,
"loss": 0.5483,
"step": 14781
},
{
"epoch": 0.94,
"grad_norm": 0.9094178676605225,
"learning_rate": 1.053159837671075e-07,
"loss": 0.5976,
"step": 14782
},
{
"epoch": 0.94,
"grad_norm": 0.8431292772293091,
"learning_rate": 1.0510661635528774e-07,
"loss": 0.5384,
"step": 14783
},
{
"epoch": 0.94,
"grad_norm": 0.9035899639129639,
"learning_rate": 1.0489745505040006e-07,
"loss": 0.5172,
"step": 14784
},
{
"epoch": 0.94,
"grad_norm": 0.849152147769928,
"learning_rate": 1.0468849986125185e-07,
"loss": 0.5678,
"step": 14785
},
{
"epoch": 0.94,
"grad_norm": 0.8912017941474915,
"learning_rate": 1.0447975079664163e-07,
"loss": 0.556,
"step": 14786
},
{
"epoch": 0.94,
"grad_norm": 0.87160325050354,
"learning_rate": 1.042712078653585e-07,
"loss": 0.5773,
"step": 14787
},
{
"epoch": 0.94,
"grad_norm": 0.8713656067848206,
"learning_rate": 1.0406287107618429e-07,
"loss": 0.5917,
"step": 14788
},
{
"epoch": 0.94,
"grad_norm": 0.8525165319442749,
"learning_rate": 1.0385474043789034e-07,
"loss": 0.5386,
"step": 14789
},
{
"epoch": 0.94,
"grad_norm": 0.8784705996513367,
"learning_rate": 1.0364681595924131e-07,
"loss": 0.5234,
"step": 14790
},
{
"epoch": 0.94,
"grad_norm": 0.8679460287094116,
"learning_rate": 1.034390976489913e-07,
"loss": 0.5367,
"step": 14791
},
{
"epoch": 0.94,
"grad_norm": 0.8518469929695129,
"learning_rate": 1.0323158551588663e-07,
"loss": 0.5427,
"step": 14792
},
{
"epoch": 0.94,
"grad_norm": 0.8853098750114441,
"learning_rate": 1.030242795686659e-07,
"loss": 0.5349,
"step": 14793
},
{
"epoch": 0.94,
"grad_norm": 0.9167693257331848,
"learning_rate": 1.0281717981605765e-07,
"loss": 0.5876,
"step": 14794
},
{
"epoch": 0.94,
"grad_norm": 0.8666211366653442,
"learning_rate": 1.0261028626678104e-07,
"loss": 0.491,
"step": 14795
},
{
"epoch": 0.94,
"grad_norm": 0.8543499708175659,
"learning_rate": 1.024035989295491e-07,
"loss": 0.5786,
"step": 14796
},
{
"epoch": 0.94,
"grad_norm": 0.9377774000167847,
"learning_rate": 1.0219711781306374e-07,
"loss": 0.6073,
"step": 14797
},
{
"epoch": 0.94,
"grad_norm": 0.9474400877952576,
"learning_rate": 1.0199084292602024e-07,
"loss": 0.5722,
"step": 14798
},
{
"epoch": 0.94,
"grad_norm": 0.9373930096626282,
"learning_rate": 1.0178477427710276e-07,
"loss": 0.6303,
"step": 14799
},
{
"epoch": 0.94,
"grad_norm": 0.9386447668075562,
"learning_rate": 1.015789118749888e-07,
"loss": 0.5895,
"step": 14800
},
{
"epoch": 0.94,
"grad_norm": 0.8682166337966919,
"learning_rate": 1.0137325572834644e-07,
"loss": 0.5264,
"step": 14801
},
{
"epoch": 0.94,
"grad_norm": 0.9619151949882507,
"learning_rate": 1.0116780584583596e-07,
"loss": 0.5927,
"step": 14802
},
{
"epoch": 0.94,
"grad_norm": 0.8697635531425476,
"learning_rate": 1.0096256223610657e-07,
"loss": 0.5474,
"step": 14803
},
{
"epoch": 0.94,
"grad_norm": 0.9294276833534241,
"learning_rate": 1.0075752490780133e-07,
"loss": 0.6086,
"step": 14804
},
{
"epoch": 0.94,
"grad_norm": 0.8718865513801575,
"learning_rate": 1.0055269386955391e-07,
"loss": 0.5709,
"step": 14805
},
{
"epoch": 0.94,
"grad_norm": 0.8805193305015564,
"learning_rate": 1.0034806912998796e-07,
"loss": 0.5771,
"step": 14806
},
{
"epoch": 0.94,
"grad_norm": 0.8892708420753479,
"learning_rate": 1.0014365069772102e-07,
"loss": 0.5927,
"step": 14807
},
{
"epoch": 0.94,
"grad_norm": 0.878617525100708,
"learning_rate": 9.993943858135846e-08,
"loss": 0.632,
"step": 14808
},
{
"epoch": 0.94,
"grad_norm": 0.9322656393051147,
"learning_rate": 9.973543278950115e-08,
"loss": 0.5679,
"step": 14809
},
{
"epoch": 0.94,
"grad_norm": 1.0550402402877808,
"learning_rate": 9.953163333073779e-08,
"loss": 0.6446,
"step": 14810
},
{
"epoch": 0.94,
"grad_norm": 0.8860265016555786,
"learning_rate": 9.932804021364928e-08,
"loss": 0.5912,
"step": 14811
},
{
"epoch": 0.94,
"grad_norm": 0.9326887726783752,
"learning_rate": 9.912465344680933e-08,
"loss": 0.5793,
"step": 14812
},
{
"epoch": 0.94,
"grad_norm": 0.7893259525299072,
"learning_rate": 9.892147303878108e-08,
"loss": 0.4661,
"step": 14813
},
{
"epoch": 0.94,
"grad_norm": 0.8492047786712646,
"learning_rate": 9.871849899811991e-08,
"loss": 0.5652,
"step": 14814
},
{
"epoch": 0.94,
"grad_norm": 0.9137168526649475,
"learning_rate": 9.851573133337288e-08,
"loss": 0.6163,
"step": 14815
},
{
"epoch": 0.94,
"grad_norm": 0.8527875542640686,
"learning_rate": 9.83131700530765e-08,
"loss": 0.5876,
"step": 14816
},
{
"epoch": 0.94,
"grad_norm": 0.9483136534690857,
"learning_rate": 9.81108151657617e-08,
"loss": 0.5822,
"step": 14817
},
{
"epoch": 0.94,
"grad_norm": 0.9227954149246216,
"learning_rate": 9.790866667994781e-08,
"loss": 0.589,
"step": 14818
},
{
"epoch": 0.94,
"grad_norm": 0.9560403823852539,
"learning_rate": 9.770672460414688e-08,
"loss": 0.6438,
"step": 14819
},
{
"epoch": 0.94,
"grad_norm": 0.9232782125473022,
"learning_rate": 9.750498894686156e-08,
"loss": 0.5775,
"step": 14820
},
{
"epoch": 0.94,
"grad_norm": 0.9010851383209229,
"learning_rate": 9.730345971658728e-08,
"loss": 0.5361,
"step": 14821
},
{
"epoch": 0.94,
"grad_norm": 0.8730600476264954,
"learning_rate": 9.710213692180836e-08,
"loss": 0.5925,
"step": 14822
},
{
"epoch": 0.94,
"grad_norm": 0.9010719060897827,
"learning_rate": 9.690102057100304e-08,
"loss": 0.5806,
"step": 14823
},
{
"epoch": 0.94,
"grad_norm": 0.8896587491035461,
"learning_rate": 9.670011067263896e-08,
"loss": 0.538,
"step": 14824
},
{
"epoch": 0.94,
"grad_norm": 0.8645528554916382,
"learning_rate": 9.649940723517549e-08,
"loss": 0.5407,
"step": 14825
},
{
"epoch": 0.94,
"grad_norm": 0.9243733286857605,
"learning_rate": 9.629891026706472e-08,
"loss": 0.5227,
"step": 14826
},
{
"epoch": 0.94,
"grad_norm": 0.8660601377487183,
"learning_rate": 9.609861977674773e-08,
"loss": 0.5238,
"step": 14827
},
{
"epoch": 0.94,
"grad_norm": 0.8025797605514526,
"learning_rate": 9.589853577265829e-08,
"loss": 0.5363,
"step": 14828
},
{
"epoch": 0.94,
"grad_norm": 0.8957266807556152,
"learning_rate": 9.569865826322133e-08,
"loss": 0.5925,
"step": 14829
},
{
"epoch": 0.94,
"grad_norm": 0.8849166035652161,
"learning_rate": 9.549898725685291e-08,
"loss": 0.5775,
"step": 14830
},
{
"epoch": 0.94,
"grad_norm": 0.8966931700706482,
"learning_rate": 9.52995227619613e-08,
"loss": 0.5489,
"step": 14831
},
{
"epoch": 0.94,
"grad_norm": 0.8650779724121094,
"learning_rate": 9.510026478694423e-08,
"loss": 0.5734,
"step": 14832
},
{
"epoch": 0.94,
"grad_norm": 0.799005925655365,
"learning_rate": 9.49012133401922e-08,
"loss": 0.5848,
"step": 14833
},
{
"epoch": 0.94,
"grad_norm": 0.8896704912185669,
"learning_rate": 9.47023684300863e-08,
"loss": 0.6051,
"step": 14834
},
{
"epoch": 0.94,
"grad_norm": 0.9791715145111084,
"learning_rate": 9.450373006499924e-08,
"loss": 0.5622,
"step": 14835
},
{
"epoch": 0.94,
"grad_norm": 0.9830961227416992,
"learning_rate": 9.430529825329492e-08,
"loss": 0.5459,
"step": 14836
},
{
"epoch": 0.94,
"grad_norm": 0.8603661060333252,
"learning_rate": 9.410707300333e-08,
"loss": 0.6293,
"step": 14837
},
{
"epoch": 0.94,
"grad_norm": 0.8368241190910339,
"learning_rate": 9.390905432344833e-08,
"loss": 0.5623,
"step": 14838
},
{
"epoch": 0.94,
"grad_norm": 0.9664581418037415,
"learning_rate": 9.371124222199046e-08,
"loss": 0.5745,
"step": 14839
},
{
"epoch": 0.94,
"grad_norm": 0.9014714360237122,
"learning_rate": 9.35136367072842e-08,
"loss": 0.5557,
"step": 14840
},
{
"epoch": 0.94,
"grad_norm": 0.9360548257827759,
"learning_rate": 9.331623778765009e-08,
"loss": 0.5281,
"step": 14841
},
{
"epoch": 0.94,
"grad_norm": 0.938177764415741,
"learning_rate": 9.311904547139982e-08,
"loss": 0.5604,
"step": 14842
},
{
"epoch": 0.94,
"grad_norm": 0.9227628707885742,
"learning_rate": 9.292205976683733e-08,
"loss": 0.5932,
"step": 14843
},
{
"epoch": 0.94,
"grad_norm": 0.8790847063064575,
"learning_rate": 9.272528068225595e-08,
"loss": 0.5774,
"step": 14844
},
{
"epoch": 0.94,
"grad_norm": 0.8632011413574219,
"learning_rate": 9.252870822594239e-08,
"loss": 0.574,
"step": 14845
},
{
"epoch": 0.94,
"grad_norm": 0.8468140959739685,
"learning_rate": 9.233234240617228e-08,
"loss": 0.5549,
"step": 14846
},
{
"epoch": 0.94,
"grad_norm": 0.878075897693634,
"learning_rate": 9.213618323121564e-08,
"loss": 0.589,
"step": 14847
},
{
"epoch": 0.94,
"grad_norm": 0.8827310800552368,
"learning_rate": 9.19402307093309e-08,
"loss": 0.5678,
"step": 14848
},
{
"epoch": 0.94,
"grad_norm": 0.9109396934509277,
"learning_rate": 9.174448484876864e-08,
"loss": 0.5426,
"step": 14849
},
{
"epoch": 0.94,
"grad_norm": 0.8124753832817078,
"learning_rate": 9.154894565777173e-08,
"loss": 0.486,
"step": 14850
},
{
"epoch": 0.94,
"grad_norm": 0.9251282215118408,
"learning_rate": 9.135361314457358e-08,
"loss": 0.5975,
"step": 14851
},
{
"epoch": 0.94,
"grad_norm": 0.8516286015510559,
"learning_rate": 9.115848731739874e-08,
"loss": 0.5802,
"step": 14852
},
{
"epoch": 0.94,
"grad_norm": 0.9649109244346619,
"learning_rate": 9.096356818446395e-08,
"loss": 0.6226,
"step": 14853
},
{
"epoch": 0.94,
"grad_norm": 0.8949142694473267,
"learning_rate": 9.076885575397543e-08,
"loss": 0.5577,
"step": 14854
},
{
"epoch": 0.94,
"grad_norm": 0.8870638012886047,
"learning_rate": 9.057435003413273e-08,
"loss": 0.619,
"step": 14855
},
{
"epoch": 0.94,
"grad_norm": 0.911157488822937,
"learning_rate": 9.038005103312486e-08,
"loss": 0.5763,
"step": 14856
},
{
"epoch": 0.94,
"grad_norm": 0.9567490816116333,
"learning_rate": 9.018595875913416e-08,
"loss": 0.5394,
"step": 14857
},
{
"epoch": 0.94,
"grad_norm": 0.9272708296775818,
"learning_rate": 8.999207322033299e-08,
"loss": 0.5666,
"step": 14858
},
{
"epoch": 0.94,
"grad_norm": 0.8706281781196594,
"learning_rate": 8.979839442488425e-08,
"loss": 0.5775,
"step": 14859
},
{
"epoch": 0.94,
"grad_norm": 0.8722688555717468,
"learning_rate": 8.960492238094421e-08,
"loss": 0.511,
"step": 14860
},
{
"epoch": 0.94,
"grad_norm": 0.8723371624946594,
"learning_rate": 8.941165709665966e-08,
"loss": 0.5932,
"step": 14861
},
{
"epoch": 0.94,
"grad_norm": 0.8604128360748291,
"learning_rate": 8.921859858016635e-08,
"loss": 0.5499,
"step": 14862
},
{
"epoch": 0.94,
"grad_norm": 0.9421664476394653,
"learning_rate": 8.902574683959442e-08,
"loss": 0.5585,
"step": 14863
},
{
"epoch": 0.94,
"grad_norm": 0.8716034889221191,
"learning_rate": 8.883310188306515e-08,
"loss": 0.5443,
"step": 14864
},
{
"epoch": 0.94,
"grad_norm": 0.9441227316856384,
"learning_rate": 8.864066371868873e-08,
"loss": 0.6073,
"step": 14865
},
{
"epoch": 0.94,
"grad_norm": 0.925212562084198,
"learning_rate": 8.844843235456868e-08,
"loss": 0.5865,
"step": 14866
},
{
"epoch": 0.94,
"grad_norm": 0.8931495547294617,
"learning_rate": 8.825640779879962e-08,
"loss": 0.516,
"step": 14867
},
{
"epoch": 0.94,
"grad_norm": 0.8108246326446533,
"learning_rate": 8.806459005946565e-08,
"loss": 0.5725,
"step": 14868
},
{
"epoch": 0.94,
"grad_norm": 0.92397141456604,
"learning_rate": 8.787297914464533e-08,
"loss": 0.5473,
"step": 14869
},
{
"epoch": 0.94,
"grad_norm": 0.9079901576042175,
"learning_rate": 8.768157506240494e-08,
"loss": 0.616,
"step": 14870
},
{
"epoch": 0.94,
"grad_norm": 0.9255422353744507,
"learning_rate": 8.749037782080528e-08,
"loss": 0.6009,
"step": 14871
},
{
"epoch": 0.94,
"grad_norm": 0.8876083493232727,
"learning_rate": 8.729938742789601e-08,
"loss": 0.5668,
"step": 14872
},
{
"epoch": 0.94,
"grad_norm": 0.8934264183044434,
"learning_rate": 8.71086038917196e-08,
"loss": 0.6096,
"step": 14873
},
{
"epoch": 0.94,
"grad_norm": 0.9331870079040527,
"learning_rate": 8.691802722030906e-08,
"loss": 0.5824,
"step": 14874
},
{
"epoch": 0.94,
"grad_norm": 0.8458074927330017,
"learning_rate": 8.672765742168964e-08,
"loss": 0.5393,
"step": 14875
},
{
"epoch": 0.94,
"grad_norm": 0.9242926836013794,
"learning_rate": 8.65374945038755e-08,
"loss": 0.5651,
"step": 14876
},
{
"epoch": 0.94,
"grad_norm": 0.9075822830200195,
"learning_rate": 8.634753847487575e-08,
"loss": 0.6445,
"step": 14877
},
{
"epoch": 0.94,
"grad_norm": 0.8882941603660583,
"learning_rate": 8.615778934268793e-08,
"loss": 0.6056,
"step": 14878
},
{
"epoch": 0.94,
"grad_norm": 0.8262979984283447,
"learning_rate": 8.59682471153006e-08,
"loss": 0.5354,
"step": 14879
},
{
"epoch": 0.94,
"grad_norm": 0.8280760645866394,
"learning_rate": 8.577891180069687e-08,
"loss": 0.5311,
"step": 14880
},
{
"epoch": 0.94,
"grad_norm": 0.9035456776618958,
"learning_rate": 8.558978340684642e-08,
"loss": 0.572,
"step": 14881
},
{
"epoch": 0.94,
"grad_norm": 0.8470786213874817,
"learning_rate": 8.540086194171515e-08,
"loss": 0.5889,
"step": 14882
},
{
"epoch": 0.94,
"grad_norm": 0.8594496250152588,
"learning_rate": 8.521214741325722e-08,
"loss": 0.5709,
"step": 14883
},
{
"epoch": 0.94,
"grad_norm": 0.8812367916107178,
"learning_rate": 8.502363982941797e-08,
"loss": 0.5249,
"step": 14884
},
{
"epoch": 0.94,
"grad_norm": 0.8610761165618896,
"learning_rate": 8.483533919813546e-08,
"loss": 0.5788,
"step": 14885
},
{
"epoch": 0.94,
"grad_norm": 0.882064700126648,
"learning_rate": 8.464724552733782e-08,
"loss": 0.6047,
"step": 14886
},
{
"epoch": 0.94,
"grad_norm": 0.9064013361930847,
"learning_rate": 8.445935882494593e-08,
"loss": 0.5604,
"step": 14887
},
{
"epoch": 0.94,
"grad_norm": 0.9299684166908264,
"learning_rate": 8.427167909887069e-08,
"loss": 0.5411,
"step": 14888
},
{
"epoch": 0.94,
"grad_norm": 0.9375229477882385,
"learning_rate": 8.408420635701353e-08,
"loss": 0.5626,
"step": 14889
},
{
"epoch": 0.94,
"grad_norm": 0.9137569665908813,
"learning_rate": 8.389694060726927e-08,
"loss": 0.6039,
"step": 14890
},
{
"epoch": 0.94,
"grad_norm": 0.9540867209434509,
"learning_rate": 8.370988185752383e-08,
"loss": 0.631,
"step": 14891
},
{
"epoch": 0.94,
"grad_norm": 0.8793188333511353,
"learning_rate": 8.352303011565254e-08,
"loss": 0.5763,
"step": 14892
},
{
"epoch": 0.94,
"grad_norm": 0.880684494972229,
"learning_rate": 8.333638538952305e-08,
"loss": 0.5755,
"step": 14893
},
{
"epoch": 0.94,
"grad_norm": 0.8908638954162598,
"learning_rate": 8.314994768699458e-08,
"loss": 0.5873,
"step": 14894
},
{
"epoch": 0.94,
"grad_norm": 0.9388841986656189,
"learning_rate": 8.296371701591699e-08,
"loss": 0.5329,
"step": 14895
},
{
"epoch": 0.94,
"grad_norm": 0.8881575465202332,
"learning_rate": 8.277769338413288e-08,
"loss": 0.6017,
"step": 14896
},
{
"epoch": 0.94,
"grad_norm": 0.8779671788215637,
"learning_rate": 8.259187679947434e-08,
"loss": 0.5545,
"step": 14897
},
{
"epoch": 0.94,
"grad_norm": 0.874380350112915,
"learning_rate": 8.240626726976453e-08,
"loss": 0.5587,
"step": 14898
},
{
"epoch": 0.94,
"grad_norm": 0.9045870900154114,
"learning_rate": 8.222086480282054e-08,
"loss": 0.5667,
"step": 14899
},
{
"epoch": 0.94,
"grad_norm": 0.9012387990951538,
"learning_rate": 8.20356694064478e-08,
"loss": 0.6325,
"step": 14900
},
{
"epoch": 0.94,
"grad_norm": 0.8285881280899048,
"learning_rate": 8.185068108844507e-08,
"loss": 0.5491,
"step": 14901
},
{
"epoch": 0.94,
"grad_norm": 0.8597615361213684,
"learning_rate": 8.166589985660056e-08,
"loss": 0.5429,
"step": 14902
},
{
"epoch": 0.94,
"grad_norm": 0.9608265161514282,
"learning_rate": 8.148132571869582e-08,
"loss": 0.6108,
"step": 14903
},
{
"epoch": 0.94,
"grad_norm": 0.8628665208816528,
"learning_rate": 8.129695868250242e-08,
"loss": 0.5564,
"step": 14904
},
{
"epoch": 0.94,
"grad_norm": 0.8341482877731323,
"learning_rate": 8.111279875578304e-08,
"loss": 0.5658,
"step": 14905
},
{
"epoch": 0.94,
"grad_norm": 0.8764296770095825,
"learning_rate": 8.092884594629147e-08,
"loss": 0.582,
"step": 14906
},
{
"epoch": 0.94,
"grad_norm": 0.8712512254714966,
"learning_rate": 8.074510026177485e-08,
"loss": 0.5598,
"step": 14907
},
{
"epoch": 0.94,
"grad_norm": 1.110312581062317,
"learning_rate": 8.056156170996866e-08,
"loss": 0.6074,
"step": 14908
},
{
"epoch": 0.94,
"grad_norm": 0.8867812156677246,
"learning_rate": 8.03782302986017e-08,
"loss": 0.5814,
"step": 14909
},
{
"epoch": 0.94,
"grad_norm": 0.9579918384552002,
"learning_rate": 8.019510603539338e-08,
"loss": 0.5672,
"step": 14910
},
{
"epoch": 0.94,
"grad_norm": 0.8660980463027954,
"learning_rate": 8.001218892805474e-08,
"loss": 0.569,
"step": 14911
},
{
"epoch": 0.94,
"grad_norm": 0.9106853604316711,
"learning_rate": 7.982947898428739e-08,
"loss": 0.577,
"step": 14912
},
{
"epoch": 0.94,
"grad_norm": 0.8973606824874878,
"learning_rate": 7.964697621178463e-08,
"loss": 0.623,
"step": 14913
},
{
"epoch": 0.94,
"grad_norm": 0.8993417024612427,
"learning_rate": 7.946468061823031e-08,
"loss": 0.5553,
"step": 14914
},
{
"epoch": 0.94,
"grad_norm": 0.9079226851463318,
"learning_rate": 7.928259221130163e-08,
"loss": 0.5785,
"step": 14915
},
{
"epoch": 0.95,
"grad_norm": 0.8791465759277344,
"learning_rate": 7.910071099866523e-08,
"loss": 0.6139,
"step": 14916
},
{
"epoch": 0.95,
"grad_norm": 0.8370904326438904,
"learning_rate": 7.891903698797886e-08,
"loss": 0.5087,
"step": 14917
},
{
"epoch": 0.95,
"grad_norm": 0.8958890438079834,
"learning_rate": 7.87375701868931e-08,
"loss": 0.5871,
"step": 14918
},
{
"epoch": 0.95,
"grad_norm": 0.863865315914154,
"learning_rate": 7.855631060304792e-08,
"loss": 0.5092,
"step": 14919
},
{
"epoch": 0.95,
"grad_norm": 0.8901463747024536,
"learning_rate": 7.837525824407665e-08,
"loss": 0.5633,
"step": 14920
},
{
"epoch": 0.95,
"grad_norm": 0.8937858939170837,
"learning_rate": 7.819441311760156e-08,
"loss": 0.5461,
"step": 14921
},
{
"epoch": 0.95,
"grad_norm": 0.9540120363235474,
"learning_rate": 7.801377523123877e-08,
"loss": 0.6248,
"step": 14922
},
{
"epoch": 0.95,
"grad_norm": 0.8416619300842285,
"learning_rate": 7.783334459259273e-08,
"loss": 0.5367,
"step": 14923
},
{
"epoch": 0.95,
"grad_norm": 0.9118484854698181,
"learning_rate": 7.765312120926182e-08,
"loss": 0.6225,
"step": 14924
},
{
"epoch": 0.95,
"grad_norm": 0.8478346467018127,
"learning_rate": 7.747310508883444e-08,
"loss": 0.5574,
"step": 14925
},
{
"epoch": 0.95,
"grad_norm": 0.8656757473945618,
"learning_rate": 7.729329623889114e-08,
"loss": 0.5149,
"step": 14926
},
{
"epoch": 0.95,
"grad_norm": 0.9110966920852661,
"learning_rate": 7.711369466700147e-08,
"loss": 0.6383,
"step": 14927
},
{
"epoch": 0.95,
"grad_norm": 0.8306471109390259,
"learning_rate": 7.693430038072824e-08,
"loss": 0.5397,
"step": 14928
},
{
"epoch": 0.95,
"grad_norm": 0.8142772316932678,
"learning_rate": 7.675511338762654e-08,
"loss": 0.481,
"step": 14929
},
{
"epoch": 0.95,
"grad_norm": 0.8677499890327454,
"learning_rate": 7.657613369523975e-08,
"loss": 0.5712,
"step": 14930
},
{
"epoch": 0.95,
"grad_norm": 0.8763403296470642,
"learning_rate": 7.639736131110465e-08,
"loss": 0.581,
"step": 14931
},
{
"epoch": 0.95,
"grad_norm": 1.208530068397522,
"learning_rate": 7.621879624274853e-08,
"loss": 0.5727,
"step": 14932
},
{
"epoch": 0.95,
"grad_norm": 0.8692548274993896,
"learning_rate": 7.604043849769094e-08,
"loss": 0.5459,
"step": 14933
},
{
"epoch": 0.95,
"grad_norm": 0.9086669087409973,
"learning_rate": 7.586228808344087e-08,
"loss": 0.6114,
"step": 14934
},
{
"epoch": 0.95,
"grad_norm": 0.9117394089698792,
"learning_rate": 7.56843450075001e-08,
"loss": 0.5875,
"step": 14935
},
{
"epoch": 0.95,
"grad_norm": 0.873921811580658,
"learning_rate": 7.550660927736042e-08,
"loss": 0.529,
"step": 14936
},
{
"epoch": 0.95,
"grad_norm": 0.918420135974884,
"learning_rate": 7.53290809005075e-08,
"loss": 0.5835,
"step": 14937
},
{
"epoch": 0.95,
"grad_norm": 0.8901522755622864,
"learning_rate": 7.515175988441481e-08,
"loss": 0.5254,
"step": 14938
},
{
"epoch": 0.95,
"grad_norm": 0.9384918212890625,
"learning_rate": 7.497464623654915e-08,
"loss": 0.5693,
"step": 14939
},
{
"epoch": 0.95,
"grad_norm": 0.9153959155082703,
"learning_rate": 7.479773996436845e-08,
"loss": 0.533,
"step": 14940
},
{
"epoch": 0.95,
"grad_norm": 0.8577287793159485,
"learning_rate": 7.46210410753212e-08,
"loss": 0.5409,
"step": 14941
},
{
"epoch": 0.95,
"grad_norm": 0.9197996854782104,
"learning_rate": 7.44445495768481e-08,
"loss": 0.5721,
"step": 14942
},
{
"epoch": 0.95,
"grad_norm": 0.9248746633529663,
"learning_rate": 7.426826547637989e-08,
"loss": 0.6288,
"step": 14943
},
{
"epoch": 0.95,
"grad_norm": 0.8270097374916077,
"learning_rate": 7.40921887813395e-08,
"loss": 0.565,
"step": 14944
},
{
"epoch": 0.95,
"grad_norm": 0.8829072713851929,
"learning_rate": 7.391631949914102e-08,
"loss": 0.5745,
"step": 14945
},
{
"epoch": 0.95,
"grad_norm": 0.9314706325531006,
"learning_rate": 7.374065763719018e-08,
"loss": 0.5642,
"step": 14946
},
{
"epoch": 0.95,
"grad_norm": 0.9067994356155396,
"learning_rate": 7.356520320288274e-08,
"loss": 0.6136,
"step": 14947
},
{
"epoch": 0.95,
"grad_norm": 0.8551090359687805,
"learning_rate": 7.338995620360722e-08,
"loss": 0.5157,
"step": 14948
},
{
"epoch": 0.95,
"grad_norm": 0.8279046416282654,
"learning_rate": 7.321491664674163e-08,
"loss": 0.5155,
"step": 14949
},
{
"epoch": 0.95,
"grad_norm": 0.9258044362068176,
"learning_rate": 7.304008453965727e-08,
"loss": 0.6114,
"step": 14950
},
{
"epoch": 0.95,
"grad_norm": 0.9103056192398071,
"learning_rate": 7.286545988971495e-08,
"loss": 0.5794,
"step": 14951
},
{
"epoch": 0.95,
"grad_norm": 0.885990560054779,
"learning_rate": 7.269104270426818e-08,
"loss": 0.5718,
"step": 14952
},
{
"epoch": 0.95,
"grad_norm": 0.965684175491333,
"learning_rate": 7.251683299066059e-08,
"loss": 0.5777,
"step": 14953
},
{
"epoch": 0.95,
"grad_norm": 0.9360918998718262,
"learning_rate": 7.23428307562274e-08,
"loss": 0.5961,
"step": 14954
},
{
"epoch": 0.95,
"grad_norm": 0.8759440183639526,
"learning_rate": 7.216903600829605e-08,
"loss": 0.5533,
"step": 14955
},
{
"epoch": 0.95,
"grad_norm": 0.92622309923172,
"learning_rate": 7.199544875418407e-08,
"loss": 0.6231,
"step": 14956
},
{
"epoch": 0.95,
"grad_norm": 0.9205344319343567,
"learning_rate": 7.182206900119948e-08,
"loss": 0.5824,
"step": 14957
},
{
"epoch": 0.95,
"grad_norm": 0.8723695874214172,
"learning_rate": 7.164889675664477e-08,
"loss": 0.6116,
"step": 14958
},
{
"epoch": 0.95,
"grad_norm": 0.9015873074531555,
"learning_rate": 7.147593202781022e-08,
"loss": 0.5673,
"step": 14959
},
{
"epoch": 0.95,
"grad_norm": 0.9231569170951843,
"learning_rate": 7.13031748219789e-08,
"loss": 0.5602,
"step": 14960
},
{
"epoch": 0.95,
"grad_norm": 0.8926693797111511,
"learning_rate": 7.113062514642555e-08,
"loss": 0.569,
"step": 14961
},
{
"epoch": 0.95,
"grad_norm": 0.9044926762580872,
"learning_rate": 7.095828300841435e-08,
"loss": 0.5192,
"step": 14962
},
{
"epoch": 0.95,
"grad_norm": 0.9111180901527405,
"learning_rate": 7.078614841520392e-08,
"loss": 0.6293,
"step": 14963
},
{
"epoch": 0.95,
"grad_norm": 0.8154220581054688,
"learning_rate": 7.061422137404129e-08,
"loss": 0.5807,
"step": 14964
},
{
"epoch": 0.95,
"grad_norm": 0.8674167394638062,
"learning_rate": 7.044250189216561e-08,
"loss": 0.5303,
"step": 14965
},
{
"epoch": 0.95,
"grad_norm": 0.9023363590240479,
"learning_rate": 7.027098997680726e-08,
"loss": 0.6174,
"step": 14966
},
{
"epoch": 0.95,
"grad_norm": 0.9068924784660339,
"learning_rate": 7.00996856351882e-08,
"loss": 0.5724,
"step": 14967
},
{
"epoch": 0.95,
"grad_norm": 0.9573983550071716,
"learning_rate": 6.992858887452158e-08,
"loss": 0.5502,
"step": 14968
},
{
"epoch": 0.95,
"grad_norm": 0.9019178748130798,
"learning_rate": 6.975769970201163e-08,
"loss": 0.5813,
"step": 14969
},
{
"epoch": 0.95,
"grad_norm": 0.8365936279296875,
"learning_rate": 6.958701812485369e-08,
"loss": 0.5851,
"step": 14970
},
{
"epoch": 0.95,
"grad_norm": 0.9230535626411438,
"learning_rate": 6.94165441502348e-08,
"loss": 0.5764,
"step": 14971
},
{
"epoch": 0.95,
"grad_norm": 0.9693920612335205,
"learning_rate": 6.924627778533366e-08,
"loss": 0.5714,
"step": 14972
},
{
"epoch": 0.95,
"grad_norm": 0.8915910720825195,
"learning_rate": 6.907621903731842e-08,
"loss": 0.6017,
"step": 14973
},
{
"epoch": 0.95,
"grad_norm": 0.8678449392318726,
"learning_rate": 6.890636791335003e-08,
"loss": 0.5304,
"step": 14974
},
{
"epoch": 0.95,
"grad_norm": 0.8314898014068604,
"learning_rate": 6.873672442058054e-08,
"loss": 0.5401,
"step": 14975
},
{
"epoch": 0.95,
"grad_norm": 0.893195629119873,
"learning_rate": 6.856728856615314e-08,
"loss": 0.5449,
"step": 14976
},
{
"epoch": 0.95,
"grad_norm": 0.8856935501098633,
"learning_rate": 6.839806035720209e-08,
"loss": 0.5813,
"step": 14977
},
{
"epoch": 0.95,
"grad_norm": 0.844600260257721,
"learning_rate": 6.822903980085282e-08,
"loss": 0.5913,
"step": 14978
},
{
"epoch": 0.95,
"grad_norm": 0.8763694763183594,
"learning_rate": 6.806022690422187e-08,
"loss": 0.5783,
"step": 14979
},
{
"epoch": 0.95,
"grad_norm": 0.8728997111320496,
"learning_rate": 6.789162167441798e-08,
"loss": 0.5942,
"step": 14980
},
{
"epoch": 0.95,
"grad_norm": 0.9138465523719788,
"learning_rate": 6.772322411854048e-08,
"loss": 0.6402,
"step": 14981
},
{
"epoch": 0.95,
"grad_norm": 0.9363642930984497,
"learning_rate": 6.755503424368037e-08,
"loss": 0.5904,
"step": 14982
},
{
"epoch": 0.95,
"grad_norm": 0.9248054027557373,
"learning_rate": 6.73870520569181e-08,
"loss": 0.6464,
"step": 14983
},
{
"epoch": 0.95,
"grad_norm": 0.9054921269416809,
"learning_rate": 6.721927756532853e-08,
"loss": 0.5367,
"step": 14984
},
{
"epoch": 0.95,
"grad_norm": 0.8897875547409058,
"learning_rate": 6.705171077597495e-08,
"loss": 0.5691,
"step": 14985
},
{
"epoch": 0.95,
"grad_norm": 0.9076294302940369,
"learning_rate": 6.68843516959139e-08,
"loss": 0.6368,
"step": 14986
},
{
"epoch": 0.95,
"grad_norm": 1.060864806175232,
"learning_rate": 6.67172003321903e-08,
"loss": 0.6116,
"step": 14987
},
{
"epoch": 0.95,
"grad_norm": 0.9097266793251038,
"learning_rate": 6.655025669184522e-08,
"loss": 0.6004,
"step": 14988
},
{
"epoch": 0.95,
"grad_norm": 0.8445072174072266,
"learning_rate": 6.638352078190636e-08,
"loss": 0.5223,
"step": 14989
},
{
"epoch": 0.95,
"grad_norm": 0.8733325600624084,
"learning_rate": 6.621699260939418e-08,
"loss": 0.5643,
"step": 14990
},
{
"epoch": 0.95,
"grad_norm": 0.8759425282478333,
"learning_rate": 6.605067218132145e-08,
"loss": 0.6103,
"step": 14991
},
{
"epoch": 0.95,
"grad_norm": 0.9013230800628662,
"learning_rate": 6.58845595046903e-08,
"loss": 0.643,
"step": 14992
},
{
"epoch": 0.95,
"grad_norm": 0.9331822991371155,
"learning_rate": 6.571865458649629e-08,
"loss": 0.6525,
"step": 14993
},
{
"epoch": 0.95,
"grad_norm": 0.8814842700958252,
"learning_rate": 6.555295743372492e-08,
"loss": 0.5572,
"step": 14994
},
{
"epoch": 0.95,
"grad_norm": 0.8553088903427124,
"learning_rate": 6.538746805335284e-08,
"loss": 0.5265,
"step": 14995
},
{
"epoch": 0.95,
"grad_norm": 0.9224663376808167,
"learning_rate": 6.52221864523478e-08,
"loss": 0.6197,
"step": 14996
},
{
"epoch": 0.95,
"grad_norm": 0.8610914945602417,
"learning_rate": 6.505711263766978e-08,
"loss": 0.5687,
"step": 14997
},
{
"epoch": 0.95,
"grad_norm": 0.8670297265052795,
"learning_rate": 6.48922466162688e-08,
"loss": 0.5878,
"step": 14998
},
{
"epoch": 0.95,
"grad_norm": 0.8055164217948914,
"learning_rate": 6.472758839508819e-08,
"loss": 0.553,
"step": 14999
},
{
"epoch": 0.95,
"grad_norm": 0.8382790088653564,
"learning_rate": 6.456313798105962e-08,
"loss": 0.5657,
"step": 15000
},
{
"epoch": 0.95,
"grad_norm": 0.8944666385650635,
"learning_rate": 6.439889538110867e-08,
"loss": 0.6654,
"step": 15001
},
{
"epoch": 0.95,
"grad_norm": 0.9049035310745239,
"learning_rate": 6.423486060215034e-08,
"loss": 0.5784,
"step": 15002
},
{
"epoch": 0.95,
"grad_norm": 0.8876950144767761,
"learning_rate": 6.40710336510919e-08,
"loss": 0.5315,
"step": 15003
},
{
"epoch": 0.95,
"grad_norm": 0.873111367225647,
"learning_rate": 6.390741453483119e-08,
"loss": 0.5567,
"step": 15004
},
{
"epoch": 0.95,
"grad_norm": 0.8574492931365967,
"learning_rate": 6.374400326025765e-08,
"loss": 0.5751,
"step": 15005
},
{
"epoch": 0.95,
"grad_norm": 0.9291654825210571,
"learning_rate": 6.358079983425247e-08,
"loss": 0.5736,
"step": 15006
},
{
"epoch": 0.95,
"grad_norm": 0.880135715007782,
"learning_rate": 6.341780426368737e-08,
"loss": 0.5925,
"step": 15007
},
{
"epoch": 0.95,
"grad_norm": 0.8919762372970581,
"learning_rate": 6.32550165554252e-08,
"loss": 0.5895,
"step": 15008
},
{
"epoch": 0.95,
"grad_norm": 0.9122921824455261,
"learning_rate": 6.309243671632048e-08,
"loss": 0.6224,
"step": 15009
},
{
"epoch": 0.95,
"grad_norm": 0.9773128628730774,
"learning_rate": 6.293006475321939e-08,
"loss": 0.5849,
"step": 15010
},
{
"epoch": 0.95,
"grad_norm": 0.9223852157592773,
"learning_rate": 6.276790067295813e-08,
"loss": 0.593,
"step": 15011
},
{
"epoch": 0.95,
"grad_norm": 0.8956741094589233,
"learning_rate": 6.260594448236513e-08,
"loss": 0.554,
"step": 15012
},
{
"epoch": 0.95,
"grad_norm": 0.8495054244995117,
"learning_rate": 6.244419618825992e-08,
"loss": 0.5502,
"step": 15013
},
{
"epoch": 0.95,
"grad_norm": 0.8928588628768921,
"learning_rate": 6.228265579745318e-08,
"loss": 0.5813,
"step": 15014
},
{
"epoch": 0.95,
"grad_norm": 0.8524266481399536,
"learning_rate": 6.212132331674725e-08,
"loss": 0.5767,
"step": 15015
},
{
"epoch": 0.95,
"grad_norm": 0.9605539441108704,
"learning_rate": 6.196019875293391e-08,
"loss": 0.635,
"step": 15016
},
{
"epoch": 0.95,
"grad_norm": 0.8842973709106445,
"learning_rate": 6.179928211279884e-08,
"loss": 0.5695,
"step": 15017
},
{
"epoch": 0.95,
"grad_norm": 0.9240403175354004,
"learning_rate": 6.163857340311718e-08,
"loss": 0.5168,
"step": 15018
},
{
"epoch": 0.95,
"grad_norm": 0.8997433185577393,
"learning_rate": 6.147807263065575e-08,
"loss": 0.5261,
"step": 15019
},
{
"epoch": 0.95,
"grad_norm": 0.9333794713020325,
"learning_rate": 6.131777980217302e-08,
"loss": 0.6007,
"step": 15020
},
{
"epoch": 0.95,
"grad_norm": 0.8286879062652588,
"learning_rate": 6.115769492441859e-08,
"loss": 0.5741,
"step": 15021
},
{
"epoch": 0.95,
"grad_norm": 0.9274932146072388,
"learning_rate": 6.099781800413151e-08,
"loss": 0.605,
"step": 15022
},
{
"epoch": 0.95,
"grad_norm": 0.9669122099876404,
"learning_rate": 6.083814904804586e-08,
"loss": 0.6425,
"step": 15023
},
{
"epoch": 0.95,
"grad_norm": 0.869624674320221,
"learning_rate": 6.067868806288346e-08,
"loss": 0.5806,
"step": 15024
},
{
"epoch": 0.95,
"grad_norm": 0.8654236197471619,
"learning_rate": 6.05194350553584e-08,
"loss": 0.561,
"step": 15025
},
{
"epoch": 0.95,
"grad_norm": 0.9452944397926331,
"learning_rate": 6.036039003217697e-08,
"loss": 0.5796,
"step": 15026
},
{
"epoch": 0.95,
"grad_norm": 0.9010240435600281,
"learning_rate": 6.02015530000355e-08,
"loss": 0.6307,
"step": 15027
},
{
"epoch": 0.95,
"grad_norm": 0.8794552683830261,
"learning_rate": 6.00429239656225e-08,
"loss": 0.5545,
"step": 15028
},
{
"epoch": 0.95,
"grad_norm": 0.8755041360855103,
"learning_rate": 5.988450293561765e-08,
"loss": 0.5406,
"step": 15029
},
{
"epoch": 0.95,
"grad_norm": 0.8471874594688416,
"learning_rate": 5.972628991669006e-08,
"loss": 0.5309,
"step": 15030
},
{
"epoch": 0.95,
"grad_norm": 0.9170916080474854,
"learning_rate": 5.956828491550326e-08,
"loss": 0.6034,
"step": 15031
},
{
"epoch": 0.95,
"grad_norm": 0.8616017699241638,
"learning_rate": 5.941048793870918e-08,
"loss": 0.5492,
"step": 15032
},
{
"epoch": 0.95,
"grad_norm": 0.8741750717163086,
"learning_rate": 5.92528989929525e-08,
"loss": 0.5936,
"step": 15033
},
{
"epoch": 0.95,
"grad_norm": 0.8802669048309326,
"learning_rate": 5.9095518084868467e-08,
"loss": 0.5961,
"step": 15034
},
{
"epoch": 0.95,
"grad_norm": 0.8247042298316956,
"learning_rate": 5.893834522108399e-08,
"loss": 0.5909,
"step": 15035
},
{
"epoch": 0.95,
"grad_norm": 0.9004446864128113,
"learning_rate": 5.8781380408217124e-08,
"loss": 0.5701,
"step": 15036
},
{
"epoch": 0.95,
"grad_norm": 0.8959584832191467,
"learning_rate": 5.862462365287702e-08,
"loss": 0.5698,
"step": 15037
},
{
"epoch": 0.95,
"grad_norm": 0.9272680878639221,
"learning_rate": 5.846807496166451e-08,
"loss": 0.5963,
"step": 15038
},
{
"epoch": 0.95,
"grad_norm": 0.9091727137565613,
"learning_rate": 5.831173434117043e-08,
"loss": 0.5561,
"step": 15039
},
{
"epoch": 0.95,
"grad_norm": 0.8189939260482788,
"learning_rate": 5.815560179797897e-08,
"loss": 0.5501,
"step": 15040
},
{
"epoch": 0.95,
"grad_norm": 0.8391079306602478,
"learning_rate": 5.7999677338663184e-08,
"loss": 0.5442,
"step": 15041
},
{
"epoch": 0.95,
"grad_norm": 0.8865288496017456,
"learning_rate": 5.7843960969790056e-08,
"loss": 0.5783,
"step": 15042
},
{
"epoch": 0.95,
"grad_norm": 0.8449594378471375,
"learning_rate": 5.768845269791379e-08,
"loss": 0.5728,
"step": 15043
},
{
"epoch": 0.95,
"grad_norm": 0.8410879373550415,
"learning_rate": 5.7533152529584135e-08,
"loss": 0.4878,
"step": 15044
},
{
"epoch": 0.95,
"grad_norm": 0.9583869576454163,
"learning_rate": 5.7378060471340866e-08,
"loss": 0.5291,
"step": 15045
},
{
"epoch": 0.95,
"grad_norm": 0.9091804623603821,
"learning_rate": 5.7223176529712097e-08,
"loss": 0.6207,
"step": 15046
},
{
"epoch": 0.95,
"grad_norm": 0.8123113512992859,
"learning_rate": 5.70685007112215e-08,
"loss": 0.5242,
"step": 15047
},
{
"epoch": 0.95,
"grad_norm": 0.8403564095497131,
"learning_rate": 5.691403302238052e-08,
"loss": 0.5305,
"step": 15048
},
{
"epoch": 0.95,
"grad_norm": 0.8629001379013062,
"learning_rate": 5.6759773469694523e-08,
"loss": 0.5932,
"step": 15049
},
{
"epoch": 0.95,
"grad_norm": 0.8440834283828735,
"learning_rate": 5.660572205965775e-08,
"loss": 0.5434,
"step": 15050
},
{
"epoch": 0.95,
"grad_norm": 0.9365416169166565,
"learning_rate": 5.645187879875724e-08,
"loss": 0.5669,
"step": 15051
},
{
"epoch": 0.95,
"grad_norm": 0.8103097677230835,
"learning_rate": 5.6298243693470586e-08,
"loss": 0.5747,
"step": 15052
},
{
"epoch": 0.95,
"grad_norm": 0.9661427736282349,
"learning_rate": 5.614481675026762e-08,
"loss": 0.5651,
"step": 15053
},
{
"epoch": 0.95,
"grad_norm": 0.8838450908660889,
"learning_rate": 5.59915979756076e-08,
"loss": 0.5523,
"step": 15054
},
{
"epoch": 0.95,
"grad_norm": 0.8871442675590515,
"learning_rate": 5.583858737594205e-08,
"loss": 0.5693,
"step": 15055
},
{
"epoch": 0.95,
"grad_norm": 0.9019421339035034,
"learning_rate": 5.5685784957714707e-08,
"loss": 0.5649,
"step": 15056
},
{
"epoch": 0.95,
"grad_norm": 0.8939514756202698,
"learning_rate": 5.5533190727358745e-08,
"loss": 0.595,
"step": 15057
},
{
"epoch": 0.95,
"grad_norm": 0.8858687281608582,
"learning_rate": 5.538080469129958e-08,
"loss": 0.5937,
"step": 15058
},
{
"epoch": 0.95,
"grad_norm": 0.8240897059440613,
"learning_rate": 5.522862685595376e-08,
"loss": 0.5633,
"step": 15059
},
{
"epoch": 0.95,
"grad_norm": 0.846260666847229,
"learning_rate": 5.507665722772837e-08,
"loss": 0.5337,
"step": 15060
},
{
"epoch": 0.95,
"grad_norm": 0.8530579209327698,
"learning_rate": 5.492489581302329e-08,
"loss": 0.5521,
"step": 15061
},
{
"epoch": 0.95,
"grad_norm": 0.9388294816017151,
"learning_rate": 5.477334261822842e-08,
"loss": 0.5776,
"step": 15062
},
{
"epoch": 0.95,
"grad_norm": 0.9231818914413452,
"learning_rate": 5.46219976497242e-08,
"loss": 0.555,
"step": 15063
},
{
"epoch": 0.95,
"grad_norm": 0.8340309262275696,
"learning_rate": 5.447086091388443e-08,
"loss": 0.5376,
"step": 15064
},
{
"epoch": 0.95,
"grad_norm": 0.8696257472038269,
"learning_rate": 5.4319932417072344e-08,
"loss": 0.5702,
"step": 15065
},
{
"epoch": 0.95,
"grad_norm": 0.929787814617157,
"learning_rate": 5.416921216564286e-08,
"loss": 0.6018,
"step": 15066
},
{
"epoch": 0.95,
"grad_norm": 0.8924655914306641,
"learning_rate": 5.401870016594313e-08,
"loss": 0.5825,
"step": 15067
},
{
"epoch": 0.95,
"grad_norm": 0.9113631248474121,
"learning_rate": 5.38683964243103e-08,
"loss": 0.5701,
"step": 15068
},
{
"epoch": 0.95,
"grad_norm": 0.8545387387275696,
"learning_rate": 5.3718300947072086e-08,
"loss": 0.5573,
"step": 15069
},
{
"epoch": 0.95,
"grad_norm": 0.9120925068855286,
"learning_rate": 5.356841374055011e-08,
"loss": 0.6162,
"step": 15070
},
{
"epoch": 0.95,
"grad_norm": 0.843596875667572,
"learning_rate": 5.341873481105431e-08,
"loss": 0.6176,
"step": 15071
},
{
"epoch": 0.95,
"grad_norm": 0.9411029815673828,
"learning_rate": 5.3269264164887977e-08,
"loss": 0.6389,
"step": 15072
},
{
"epoch": 0.95,
"grad_norm": 0.9116719961166382,
"learning_rate": 5.3120001808344425e-08,
"loss": 0.5561,
"step": 15073
},
{
"epoch": 0.96,
"grad_norm": 0.8383541703224182,
"learning_rate": 5.297094774770861e-08,
"loss": 0.5032,
"step": 15074
},
{
"epoch": 0.96,
"grad_norm": 0.8465346097946167,
"learning_rate": 5.282210198925664e-08,
"loss": 0.598,
"step": 15075
},
{
"epoch": 0.96,
"grad_norm": 0.9090423583984375,
"learning_rate": 5.267346453925626e-08,
"loss": 0.5912,
"step": 15076
},
{
"epoch": 0.96,
"grad_norm": 0.9705894589424133,
"learning_rate": 5.2525035403965805e-08,
"loss": 0.5754,
"step": 15077
},
{
"epoch": 0.96,
"grad_norm": 0.9257639646530151,
"learning_rate": 5.237681458963473e-08,
"loss": 0.6093,
"step": 15078
},
{
"epoch": 0.96,
"grad_norm": 0.8317881226539612,
"learning_rate": 5.222880210250469e-08,
"loss": 0.5539,
"step": 15079
},
{
"epoch": 0.96,
"grad_norm": 0.8732230067253113,
"learning_rate": 5.2080997948807944e-08,
"loss": 0.5984,
"step": 15080
},
{
"epoch": 0.96,
"grad_norm": 0.9015724658966064,
"learning_rate": 5.193340213476727e-08,
"loss": 0.5556,
"step": 15081
},
{
"epoch": 0.96,
"grad_norm": 0.912030041217804,
"learning_rate": 5.178601466659827e-08,
"loss": 0.5774,
"step": 15082
},
{
"epoch": 0.96,
"grad_norm": 0.9283223748207092,
"learning_rate": 5.163883555050708e-08,
"loss": 0.5966,
"step": 15083
},
{
"epoch": 0.96,
"grad_norm": 0.8436444997787476,
"learning_rate": 5.149186479268986e-08,
"loss": 0.5682,
"step": 15084
},
{
"epoch": 0.96,
"grad_norm": 0.9397615194320679,
"learning_rate": 5.134510239933554e-08,
"loss": 0.6182,
"step": 15085
},
{
"epoch": 0.96,
"grad_norm": 0.8505292534828186,
"learning_rate": 5.119854837662419e-08,
"loss": 0.5501,
"step": 15086
},
{
"epoch": 0.96,
"grad_norm": 0.9102478623390198,
"learning_rate": 5.1052202730725865e-08,
"loss": 0.5655,
"step": 15087
},
{
"epoch": 0.96,
"grad_norm": 0.8728495240211487,
"learning_rate": 5.0906065467803965e-08,
"loss": 0.5521,
"step": 15088
},
{
"epoch": 0.96,
"grad_norm": 0.9245344400405884,
"learning_rate": 5.0760136594010246e-08,
"loss": 0.5207,
"step": 15089
},
{
"epoch": 0.96,
"grad_norm": 0.8598072528839111,
"learning_rate": 5.061441611549034e-08,
"loss": 0.5593,
"step": 15090
},
{
"epoch": 0.96,
"grad_norm": 0.8677455186843872,
"learning_rate": 5.046890403837989e-08,
"loss": 0.5772,
"step": 15091
},
{
"epoch": 0.96,
"grad_norm": 0.8624927401542664,
"learning_rate": 5.032360036880568e-08,
"loss": 0.5621,
"step": 15092
},
{
"epoch": 0.96,
"grad_norm": 0.8490076661109924,
"learning_rate": 5.0178505112885576e-08,
"loss": 0.5548,
"step": 15093
},
{
"epoch": 0.96,
"grad_norm": 0.9380584359169006,
"learning_rate": 5.00336182767297e-08,
"loss": 0.5661,
"step": 15094
},
{
"epoch": 0.96,
"grad_norm": 0.9134517908096313,
"learning_rate": 4.988893986643817e-08,
"loss": 0.5705,
"step": 15095
},
{
"epoch": 0.96,
"grad_norm": 0.8528224229812622,
"learning_rate": 4.9744469888103887e-08,
"loss": 0.6008,
"step": 15096
},
{
"epoch": 0.96,
"grad_norm": 0.8432444930076599,
"learning_rate": 4.9600208347809206e-08,
"loss": 0.5205,
"step": 15097
},
{
"epoch": 0.96,
"grad_norm": 0.9236396551132202,
"learning_rate": 4.945615525162761e-08,
"loss": 0.594,
"step": 15098
},
{
"epoch": 0.96,
"grad_norm": 0.8537143468856812,
"learning_rate": 4.931231060562702e-08,
"loss": 0.5326,
"step": 15099
},
{
"epoch": 0.96,
"grad_norm": 0.929633378982544,
"learning_rate": 4.916867441586204e-08,
"loss": 0.637,
"step": 15100
},
{
"epoch": 0.96,
"grad_norm": 0.8501139283180237,
"learning_rate": 4.902524668838116e-08,
"loss": 0.5492,
"step": 15101
},
{
"epoch": 0.96,
"grad_norm": 0.8739571571350098,
"learning_rate": 4.88820274292251e-08,
"loss": 0.5558,
"step": 15102
},
{
"epoch": 0.96,
"grad_norm": 0.8676999807357788,
"learning_rate": 4.873901664442182e-08,
"loss": 0.5524,
"step": 15103
},
{
"epoch": 0.96,
"grad_norm": 0.864040195941925,
"learning_rate": 4.8596214339995395e-08,
"loss": 0.5512,
"step": 15104
},
{
"epoch": 0.96,
"grad_norm": 0.9180824160575867,
"learning_rate": 4.8453620521957124e-08,
"loss": 0.571,
"step": 15105
},
{
"epoch": 0.96,
"grad_norm": 0.9425962567329407,
"learning_rate": 4.83112351963122e-08,
"loss": 0.5272,
"step": 15106
},
{
"epoch": 0.96,
"grad_norm": 0.9564583897590637,
"learning_rate": 4.816905836905528e-08,
"loss": 0.5995,
"step": 15107
},
{
"epoch": 0.96,
"grad_norm": 0.8326453566551208,
"learning_rate": 4.802709004617267e-08,
"loss": 0.5695,
"step": 15108
},
{
"epoch": 0.96,
"grad_norm": 0.8696445822715759,
"learning_rate": 4.788533023364295e-08,
"loss": 0.5204,
"step": 15109
},
{
"epoch": 0.96,
"grad_norm": 0.9089856743812561,
"learning_rate": 4.77437789374352e-08,
"loss": 0.6211,
"step": 15110
},
{
"epoch": 0.96,
"grad_norm": 0.9463760256767273,
"learning_rate": 4.760243616350913e-08,
"loss": 0.5978,
"step": 15111
},
{
"epoch": 0.96,
"grad_norm": 0.8912383913993835,
"learning_rate": 4.746130191781606e-08,
"loss": 0.5724,
"step": 15112
},
{
"epoch": 0.96,
"grad_norm": 0.8581725358963013,
"learning_rate": 4.7320376206299034e-08,
"loss": 0.5992,
"step": 15113
},
{
"epoch": 0.96,
"grad_norm": 0.878282368183136,
"learning_rate": 4.717965903489219e-08,
"loss": 0.5798,
"step": 15114
},
{
"epoch": 0.96,
"grad_norm": 0.866300642490387,
"learning_rate": 4.7039150409519674e-08,
"loss": 0.5509,
"step": 15115
},
{
"epoch": 0.96,
"grad_norm": 0.8967102766036987,
"learning_rate": 4.6898850336098975e-08,
"loss": 0.5595,
"step": 15116
},
{
"epoch": 0.96,
"grad_norm": 0.8599669337272644,
"learning_rate": 4.675875882053704e-08,
"loss": 0.5854,
"step": 15117
},
{
"epoch": 0.96,
"grad_norm": 0.8142878413200378,
"learning_rate": 4.6618875868733037e-08,
"loss": 0.5502,
"step": 15118
},
{
"epoch": 0.96,
"grad_norm": 0.8852924704551697,
"learning_rate": 4.6479201486575585e-08,
"loss": 0.54,
"step": 15119
},
{
"epoch": 0.96,
"grad_norm": 0.8815605044364929,
"learning_rate": 4.633973567994776e-08,
"loss": 0.6029,
"step": 15120
},
{
"epoch": 0.96,
"grad_norm": 0.8725368976593018,
"learning_rate": 4.620047845472098e-08,
"loss": 0.576,
"step": 15121
},
{
"epoch": 0.96,
"grad_norm": 0.8406761288642883,
"learning_rate": 4.606142981675887e-08,
"loss": 0.5832,
"step": 15122
},
{
"epoch": 0.96,
"grad_norm": 0.9023706316947937,
"learning_rate": 4.592258977191622e-08,
"loss": 0.5905,
"step": 15123
},
{
"epoch": 0.96,
"grad_norm": 0.8814811706542969,
"learning_rate": 4.578395832603999e-08,
"loss": 0.567,
"step": 15124
},
{
"epoch": 0.96,
"grad_norm": 0.9129397869110107,
"learning_rate": 4.5645535484966085e-08,
"loss": 0.5647,
"step": 15125
},
{
"epoch": 0.96,
"grad_norm": 0.9053840637207031,
"learning_rate": 4.5507321254524287e-08,
"loss": 0.561,
"step": 15126
},
{
"epoch": 0.96,
"grad_norm": 0.9199474453926086,
"learning_rate": 4.536931564053382e-08,
"loss": 0.5823,
"step": 15127
},
{
"epoch": 0.96,
"grad_norm": 0.9460538625717163,
"learning_rate": 4.523151864880504e-08,
"loss": 0.5639,
"step": 15128
},
{
"epoch": 0.96,
"grad_norm": 0.9503843188285828,
"learning_rate": 4.5093930285141086e-08,
"loss": 0.5857,
"step": 15129
},
{
"epoch": 0.96,
"grad_norm": 0.8704271912574768,
"learning_rate": 4.4956550555334546e-08,
"loss": 0.5987,
"step": 15130
},
{
"epoch": 0.96,
"grad_norm": 0.8602560758590698,
"learning_rate": 4.4819379465170785e-08,
"loss": 0.548,
"step": 15131
},
{
"epoch": 0.96,
"grad_norm": 0.9668799638748169,
"learning_rate": 4.4682417020425194e-08,
"loss": 0.5793,
"step": 15132
},
{
"epoch": 0.96,
"grad_norm": 0.9470771551132202,
"learning_rate": 4.454566322686371e-08,
"loss": 0.6333,
"step": 15133
},
{
"epoch": 0.96,
"grad_norm": 0.8778170943260193,
"learning_rate": 4.440911809024673e-08,
"loss": 0.5797,
"step": 15134
},
{
"epoch": 0.96,
"grad_norm": 0.9537930488586426,
"learning_rate": 4.427278161632187e-08,
"loss": 0.5665,
"step": 15135
},
{
"epoch": 0.96,
"grad_norm": 0.860418975353241,
"learning_rate": 4.413665381083065e-08,
"loss": 0.5036,
"step": 15136
},
{
"epoch": 0.96,
"grad_norm": 0.8717173933982849,
"learning_rate": 4.4000734679504606e-08,
"loss": 0.54,
"step": 15137
},
{
"epoch": 0.96,
"grad_norm": 0.8939148187637329,
"learning_rate": 4.386502422806749e-08,
"loss": 0.5823,
"step": 15138
},
{
"epoch": 0.96,
"grad_norm": 0.8451435565948486,
"learning_rate": 4.37295224622325e-08,
"loss": 0.5591,
"step": 15139
},
{
"epoch": 0.96,
"grad_norm": 0.8331038951873779,
"learning_rate": 4.359422938770619e-08,
"loss": 0.5757,
"step": 15140
},
{
"epoch": 0.96,
"grad_norm": 0.9195752739906311,
"learning_rate": 4.3459145010184e-08,
"loss": 0.5171,
"step": 15141
},
{
"epoch": 0.96,
"grad_norm": 0.9011934995651245,
"learning_rate": 4.3324269335355274e-08,
"loss": 0.6086,
"step": 15142
},
{
"epoch": 0.96,
"grad_norm": 0.929836094379425,
"learning_rate": 4.31896023688988e-08,
"loss": 0.5732,
"step": 15143
},
{
"epoch": 0.96,
"grad_norm": 0.9297063946723938,
"learning_rate": 4.305514411648393e-08,
"loss": 0.5652,
"step": 15144
},
{
"epoch": 0.96,
"grad_norm": 0.8522423505783081,
"learning_rate": 4.2920894583773906e-08,
"loss": 0.5228,
"step": 15145
},
{
"epoch": 0.96,
"grad_norm": 0.8246431350708008,
"learning_rate": 4.278685377641978e-08,
"loss": 0.5246,
"step": 15146
},
{
"epoch": 0.96,
"grad_norm": 0.918134868144989,
"learning_rate": 4.2653021700066466e-08,
"loss": 0.5447,
"step": 15147
},
{
"epoch": 0.96,
"grad_norm": 0.9000471830368042,
"learning_rate": 4.251939836034946e-08,
"loss": 0.5654,
"step": 15148
},
{
"epoch": 0.96,
"grad_norm": 0.8861368298530579,
"learning_rate": 4.238598376289482e-08,
"loss": 0.5441,
"step": 15149
},
{
"epoch": 0.96,
"grad_norm": 0.8738230466842651,
"learning_rate": 4.225277791331972e-08,
"loss": 0.5582,
"step": 15150
},
{
"epoch": 0.96,
"grad_norm": 0.8639594316482544,
"learning_rate": 4.211978081723356e-08,
"loss": 0.5699,
"step": 15151
},
{
"epoch": 0.96,
"grad_norm": 0.855905294418335,
"learning_rate": 4.19869924802363e-08,
"loss": 0.5731,
"step": 15152
},
{
"epoch": 0.96,
"grad_norm": 0.8283065557479858,
"learning_rate": 4.185441290791903e-08,
"loss": 0.5216,
"step": 15153
},
{
"epoch": 0.96,
"grad_norm": 0.9064654111862183,
"learning_rate": 4.1722042105863946e-08,
"loss": 0.593,
"step": 15154
},
{
"epoch": 0.96,
"grad_norm": 0.9007862210273743,
"learning_rate": 4.158988007964548e-08,
"loss": 0.5809,
"step": 15155
},
{
"epoch": 0.96,
"grad_norm": 0.8880447149276733,
"learning_rate": 4.145792683482808e-08,
"loss": 0.5822,
"step": 15156
},
{
"epoch": 0.96,
"grad_norm": 0.8769849538803101,
"learning_rate": 4.132618237696784e-08,
"loss": 0.6075,
"step": 15157
},
{
"epoch": 0.96,
"grad_norm": 0.9147706627845764,
"learning_rate": 4.1194646711612555e-08,
"loss": 0.6183,
"step": 15158
},
{
"epoch": 0.96,
"grad_norm": 0.9345024824142456,
"learning_rate": 4.1063319844299454e-08,
"loss": 0.5518,
"step": 15159
},
{
"epoch": 0.96,
"grad_norm": 0.8593862652778625,
"learning_rate": 4.0932201780559674e-08,
"loss": 0.5756,
"step": 15160
},
{
"epoch": 0.96,
"grad_norm": 0.8958163857460022,
"learning_rate": 4.080129252591325e-08,
"loss": 0.5601,
"step": 15161
},
{
"epoch": 0.96,
"grad_norm": 0.8987419009208679,
"learning_rate": 4.0670592085872984e-08,
"loss": 0.5828,
"step": 15162
},
{
"epoch": 0.96,
"grad_norm": 0.8477271795272827,
"learning_rate": 4.054010046594115e-08,
"loss": 0.5592,
"step": 15163
},
{
"epoch": 0.96,
"grad_norm": 0.8878704309463501,
"learning_rate": 4.040981767161334e-08,
"loss": 0.5865,
"step": 15164
},
{
"epoch": 0.96,
"grad_norm": 0.8833525776863098,
"learning_rate": 4.027974370837518e-08,
"loss": 0.5872,
"step": 15165
},
{
"epoch": 0.96,
"grad_norm": 0.8798415064811707,
"learning_rate": 4.014987858170283e-08,
"loss": 0.5793,
"step": 15166
},
{
"epoch": 0.96,
"grad_norm": 0.8760266900062561,
"learning_rate": 4.0020222297065256e-08,
"loss": 0.5665,
"step": 15167
},
{
"epoch": 0.96,
"grad_norm": 0.9930770993232727,
"learning_rate": 3.9890774859921987e-08,
"loss": 0.591,
"step": 15168
},
{
"epoch": 0.96,
"grad_norm": 0.9291293621063232,
"learning_rate": 3.97615362757231e-08,
"loss": 0.6082,
"step": 15169
},
{
"epoch": 0.96,
"grad_norm": 0.9287815093994141,
"learning_rate": 3.9632506549910356e-08,
"loss": 0.5971,
"step": 15170
},
{
"epoch": 0.96,
"grad_norm": 0.9018691778182983,
"learning_rate": 3.9503685687916627e-08,
"loss": 0.5316,
"step": 15171
},
{
"epoch": 0.96,
"grad_norm": 0.8846923112869263,
"learning_rate": 3.937507369516702e-08,
"loss": 0.5959,
"step": 15172
},
{
"epoch": 0.96,
"grad_norm": 0.9421688914299011,
"learning_rate": 3.92466705770761e-08,
"loss": 0.5618,
"step": 15173
},
{
"epoch": 0.96,
"grad_norm": 0.8841253519058228,
"learning_rate": 3.911847633905008e-08,
"loss": 0.5895,
"step": 15174
},
{
"epoch": 0.96,
"grad_norm": 0.9070528149604797,
"learning_rate": 3.899049098648799e-08,
"loss": 0.5664,
"step": 15175
},
{
"epoch": 0.96,
"grad_norm": 0.8911782503128052,
"learning_rate": 3.88627145247783e-08,
"loss": 0.5772,
"step": 15176
},
{
"epoch": 0.96,
"grad_norm": 0.9055638909339905,
"learning_rate": 3.873514695930114e-08,
"loss": 0.6169,
"step": 15177
},
{
"epoch": 0.96,
"grad_norm": 0.915357232093811,
"learning_rate": 3.860778829542777e-08,
"loss": 0.6026,
"step": 15178
},
{
"epoch": 0.96,
"grad_norm": 0.9006307721138,
"learning_rate": 3.848063853852113e-08,
"loss": 0.5744,
"step": 15179
},
{
"epoch": 0.96,
"grad_norm": 0.845581591129303,
"learning_rate": 3.835369769393471e-08,
"loss": 0.5451,
"step": 15180
},
{
"epoch": 0.96,
"grad_norm": 0.8819062113761902,
"learning_rate": 3.822696576701368e-08,
"loss": 0.5855,
"step": 15181
},
{
"epoch": 0.96,
"grad_norm": 0.8674046993255615,
"learning_rate": 3.8100442763094324e-08,
"loss": 0.5511,
"step": 15182
},
{
"epoch": 0.96,
"grad_norm": 0.8748277425765991,
"learning_rate": 3.797412868750461e-08,
"loss": 0.5703,
"step": 15183
},
{
"epoch": 0.96,
"grad_norm": 0.9121760129928589,
"learning_rate": 3.784802354556249e-08,
"loss": 0.5536,
"step": 15184
},
{
"epoch": 0.96,
"grad_norm": 0.9617106318473816,
"learning_rate": 3.7722127342578183e-08,
"loss": 0.5778,
"step": 15185
},
{
"epoch": 0.96,
"grad_norm": 0.9024949073791504,
"learning_rate": 3.759644008385244e-08,
"loss": 0.5615,
"step": 15186
},
{
"epoch": 0.96,
"grad_norm": 0.9101724028587341,
"learning_rate": 3.747096177467768e-08,
"loss": 0.5541,
"step": 15187
},
{
"epoch": 0.96,
"grad_norm": 0.8800556659698486,
"learning_rate": 3.7345692420337476e-08,
"loss": 0.535,
"step": 15188
},
{
"epoch": 0.96,
"grad_norm": 0.9029918909072876,
"learning_rate": 3.722063202610593e-08,
"loss": 0.591,
"step": 15189
},
{
"epoch": 0.96,
"grad_norm": 0.9296280145645142,
"learning_rate": 3.709578059724939e-08,
"loss": 0.5728,
"step": 15190
},
{
"epoch": 0.96,
"grad_norm": 0.865096390247345,
"learning_rate": 3.697113813902531e-08,
"loss": 0.564,
"step": 15191
},
{
"epoch": 0.96,
"grad_norm": 0.9005051851272583,
"learning_rate": 3.684670465668116e-08,
"loss": 0.6006,
"step": 15192
},
{
"epoch": 0.96,
"grad_norm": 0.8931210041046143,
"learning_rate": 3.6722480155456655e-08,
"loss": 0.5469,
"step": 15193
},
{
"epoch": 0.96,
"grad_norm": 0.8682152032852173,
"learning_rate": 3.6598464640582586e-08,
"loss": 0.5758,
"step": 15194
},
{
"epoch": 0.96,
"grad_norm": 0.9252963662147522,
"learning_rate": 3.64746581172809e-08,
"loss": 0.6333,
"step": 15195
},
{
"epoch": 0.96,
"grad_norm": 0.9037860035896301,
"learning_rate": 3.6351060590764656e-08,
"loss": 0.567,
"step": 15196
},
{
"epoch": 0.96,
"grad_norm": 0.827499270439148,
"learning_rate": 3.6227672066237454e-08,
"loss": 0.5443,
"step": 15197
},
{
"epoch": 0.96,
"grad_norm": 0.8574694395065308,
"learning_rate": 3.6104492548895695e-08,
"loss": 0.6125,
"step": 15198
},
{
"epoch": 0.96,
"grad_norm": 0.8817412853240967,
"learning_rate": 3.5981522043925796e-08,
"loss": 0.5583,
"step": 15199
},
{
"epoch": 0.96,
"grad_norm": 0.8701195120811462,
"learning_rate": 3.585876055650528e-08,
"loss": 0.5599,
"step": 15200
},
{
"epoch": 0.96,
"grad_norm": 0.8706973195075989,
"learning_rate": 3.5736208091802784e-08,
"loss": 0.5572,
"step": 15201
},
{
"epoch": 0.96,
"grad_norm": 0.9477795958518982,
"learning_rate": 3.5613864654979734e-08,
"loss": 0.6371,
"step": 15202
},
{
"epoch": 0.96,
"grad_norm": 0.9767326712608337,
"learning_rate": 3.5491730251187016e-08,
"loss": 0.5926,
"step": 15203
},
{
"epoch": 0.96,
"grad_norm": 0.8724082708358765,
"learning_rate": 3.5369804885567185e-08,
"loss": 0.5583,
"step": 15204
},
{
"epoch": 0.96,
"grad_norm": 0.868426501750946,
"learning_rate": 3.52480885632539e-08,
"loss": 0.5689,
"step": 15205
},
{
"epoch": 0.96,
"grad_norm": 0.9158200621604919,
"learning_rate": 3.512658128937252e-08,
"loss": 0.5782,
"step": 15206
},
{
"epoch": 0.96,
"grad_norm": 0.889900267124176,
"learning_rate": 3.500528306904005e-08,
"loss": 0.5747,
"step": 15207
},
{
"epoch": 0.96,
"grad_norm": 0.9287896752357483,
"learning_rate": 3.488419390736242e-08,
"loss": 0.5598,
"step": 15208
},
{
"epoch": 0.96,
"grad_norm": 0.9180722236633301,
"learning_rate": 3.476331380943887e-08,
"loss": 0.6264,
"step": 15209
},
{
"epoch": 0.96,
"grad_norm": 0.9249047040939331,
"learning_rate": 3.464264278035978e-08,
"loss": 0.5909,
"step": 15210
},
{
"epoch": 0.96,
"grad_norm": 0.8158274292945862,
"learning_rate": 3.452218082520553e-08,
"loss": 0.5317,
"step": 15211
},
{
"epoch": 0.96,
"grad_norm": 0.8706426620483398,
"learning_rate": 3.440192794904873e-08,
"loss": 0.547,
"step": 15212
},
{
"epoch": 0.96,
"grad_norm": 0.8472093343734741,
"learning_rate": 3.4281884156953106e-08,
"loss": 0.5676,
"step": 15213
},
{
"epoch": 0.96,
"grad_norm": 0.8854379057884216,
"learning_rate": 3.416204945397239e-08,
"loss": 0.5735,
"step": 15214
},
{
"epoch": 0.96,
"grad_norm": 0.8368361592292786,
"learning_rate": 3.4042423845153104e-08,
"loss": 0.5672,
"step": 15215
},
{
"epoch": 0.96,
"grad_norm": 0.8709746599197388,
"learning_rate": 3.392300733553178e-08,
"loss": 0.573,
"step": 15216
},
{
"epoch": 0.96,
"grad_norm": 0.9157810807228088,
"learning_rate": 3.380379993013716e-08,
"loss": 0.6188,
"step": 15217
},
{
"epoch": 0.96,
"grad_norm": 0.9439373016357422,
"learning_rate": 3.368480163398802e-08,
"loss": 0.5568,
"step": 15218
},
{
"epoch": 0.96,
"grad_norm": 0.8551452159881592,
"learning_rate": 3.356601245209534e-08,
"loss": 0.5708,
"step": 15219
},
{
"epoch": 0.96,
"grad_norm": 0.9316923022270203,
"learning_rate": 3.344743238946124e-08,
"loss": 0.5438,
"step": 15220
},
{
"epoch": 0.96,
"grad_norm": 0.912805438041687,
"learning_rate": 3.332906145107839e-08,
"loss": 0.5485,
"step": 15221
},
{
"epoch": 0.96,
"grad_norm": 0.8899156451225281,
"learning_rate": 3.3210899641930586e-08,
"loss": 0.5284,
"step": 15222
},
{
"epoch": 0.96,
"grad_norm": 0.9470401406288147,
"learning_rate": 3.3092946966994385e-08,
"loss": 0.6202,
"step": 15223
},
{
"epoch": 0.96,
"grad_norm": 0.8981837630271912,
"learning_rate": 3.297520343123473e-08,
"loss": 0.5932,
"step": 15224
},
{
"epoch": 0.96,
"grad_norm": 0.8313995003700256,
"learning_rate": 3.285766903961096e-08,
"loss": 0.5378,
"step": 15225
},
{
"epoch": 0.96,
"grad_norm": 0.9461470246315002,
"learning_rate": 3.274034379707081e-08,
"loss": 0.6186,
"step": 15226
},
{
"epoch": 0.96,
"grad_norm": 0.8666161298751831,
"learning_rate": 3.262322770855475e-08,
"loss": 0.5918,
"step": 15227
},
{
"epoch": 0.96,
"grad_norm": 0.9340410828590393,
"learning_rate": 3.250632077899496e-08,
"loss": 0.6198,
"step": 15228
},
{
"epoch": 0.96,
"grad_norm": 0.9431737661361694,
"learning_rate": 3.238962301331305e-08,
"loss": 0.6251,
"step": 15229
},
{
"epoch": 0.96,
"grad_norm": 0.9066559076309204,
"learning_rate": 3.227313441642288e-08,
"loss": 0.6061,
"step": 15230
},
{
"epoch": 0.96,
"grad_norm": 0.9017807245254517,
"learning_rate": 3.2156854993229955e-08,
"loss": 0.5255,
"step": 15231
},
{
"epoch": 0.97,
"grad_norm": 0.8806298971176147,
"learning_rate": 3.2040784748629814e-08,
"loss": 0.5691,
"step": 15232
},
{
"epoch": 0.97,
"grad_norm": 0.9558776617050171,
"learning_rate": 3.192492368750966e-08,
"loss": 0.5747,
"step": 15233
},
{
"epoch": 0.97,
"grad_norm": 0.9031001329421997,
"learning_rate": 3.180927181474891e-08,
"loss": 0.6138,
"step": 15234
},
{
"epoch": 0.97,
"grad_norm": 0.9222975373268127,
"learning_rate": 3.16938291352159e-08,
"loss": 0.5767,
"step": 15235
},
{
"epoch": 0.97,
"grad_norm": 0.8207947611808777,
"learning_rate": 3.157859565377286e-08,
"loss": 0.5334,
"step": 15236
},
{
"epoch": 0.97,
"grad_norm": 0.8401099443435669,
"learning_rate": 3.146357137527145e-08,
"loss": 0.5246,
"step": 15237
},
{
"epoch": 0.97,
"grad_norm": 0.8946247100830078,
"learning_rate": 3.1348756304554475e-08,
"loss": 0.5931,
"step": 15238
},
{
"epoch": 0.97,
"grad_norm": 0.877619743347168,
"learning_rate": 3.123415044645639e-08,
"loss": 0.5706,
"step": 15239
},
{
"epoch": 0.97,
"grad_norm": 0.865767240524292,
"learning_rate": 3.111975380580334e-08,
"loss": 0.5536,
"step": 15240
},
{
"epoch": 0.97,
"grad_norm": 0.8900549411773682,
"learning_rate": 3.100556638741203e-08,
"loss": 0.6122,
"step": 15241
},
{
"epoch": 0.97,
"grad_norm": 0.8810964822769165,
"learning_rate": 3.089158819609084e-08,
"loss": 0.5664,
"step": 15242
},
{
"epoch": 0.97,
"grad_norm": 0.8617457747459412,
"learning_rate": 3.077781923663814e-08,
"loss": 0.604,
"step": 15243
},
{
"epoch": 0.97,
"grad_norm": 0.8632597923278809,
"learning_rate": 3.066425951384455e-08,
"loss": 0.4994,
"step": 15244
},
{
"epoch": 0.97,
"grad_norm": 0.8872633576393127,
"learning_rate": 3.055090903249236e-08,
"loss": 0.6185,
"step": 15245
},
{
"epoch": 0.97,
"grad_norm": 0.8761091828346252,
"learning_rate": 3.0437767797353856e-08,
"loss": 0.5448,
"step": 15246
},
{
"epoch": 0.97,
"grad_norm": 0.8925213813781738,
"learning_rate": 3.032483581319301e-08,
"loss": 0.5856,
"step": 15247
},
{
"epoch": 0.97,
"grad_norm": 0.9047413468360901,
"learning_rate": 3.021211308476546e-08,
"loss": 0.5834,
"step": 15248
},
{
"epoch": 0.97,
"grad_norm": 0.8721828460693359,
"learning_rate": 3.0099599616816856e-08,
"loss": 0.571,
"step": 15249
},
{
"epoch": 0.97,
"grad_norm": 0.9577370882034302,
"learning_rate": 2.998729541408507e-08,
"loss": 0.5948,
"step": 15250
},
{
"epoch": 0.97,
"grad_norm": 0.9254369139671326,
"learning_rate": 2.987520048129911e-08,
"loss": 0.5841,
"step": 15251
},
{
"epoch": 0.97,
"grad_norm": 0.8802624940872192,
"learning_rate": 2.976331482317796e-08,
"loss": 0.5672,
"step": 15252
},
{
"epoch": 0.97,
"grad_norm": 0.8794838786125183,
"learning_rate": 2.9651638444434528e-08,
"loss": 0.6076,
"step": 15253
},
{
"epoch": 0.97,
"grad_norm": 0.8821896910667419,
"learning_rate": 2.9540171349769497e-08,
"loss": 0.5937,
"step": 15254
},
{
"epoch": 0.97,
"grad_norm": 0.8732861876487732,
"learning_rate": 2.942891354387689e-08,
"loss": 0.5617,
"step": 15255
},
{
"epoch": 0.97,
"grad_norm": 0.9027414321899414,
"learning_rate": 2.9317865031441295e-08,
"loss": 0.5962,
"step": 15256
},
{
"epoch": 0.97,
"grad_norm": 0.9479333162307739,
"learning_rate": 2.920702581713841e-08,
"loss": 0.5659,
"step": 15257
},
{
"epoch": 0.97,
"grad_norm": 0.9096110463142395,
"learning_rate": 2.909639590563562e-08,
"loss": 0.6119,
"step": 15258
},
{
"epoch": 0.97,
"grad_norm": 0.8687134981155396,
"learning_rate": 2.8985975301591975e-08,
"loss": 0.5714,
"step": 15259
},
{
"epoch": 0.97,
"grad_norm": 0.8929232358932495,
"learning_rate": 2.887576400965486e-08,
"loss": 0.5406,
"step": 15260
},
{
"epoch": 0.97,
"grad_norm": 0.8353814482688904,
"learning_rate": 2.8765762034466682e-08,
"loss": 0.5534,
"step": 15261
},
{
"epoch": 0.97,
"grad_norm": 0.8765125274658203,
"learning_rate": 2.8655969380658177e-08,
"loss": 0.5562,
"step": 15262
},
{
"epoch": 0.97,
"grad_norm": 0.9010085463523865,
"learning_rate": 2.8546386052853427e-08,
"loss": 0.5473,
"step": 15263
},
{
"epoch": 0.97,
"grad_norm": 0.9564641118049622,
"learning_rate": 2.8437012055665403e-08,
"loss": 0.6325,
"step": 15264
},
{
"epoch": 0.97,
"grad_norm": 0.8980580568313599,
"learning_rate": 2.832784739369987e-08,
"loss": 0.5999,
"step": 15265
},
{
"epoch": 0.97,
"grad_norm": 0.8257995843887329,
"learning_rate": 2.8218892071553705e-08,
"loss": 0.5208,
"step": 15266
},
{
"epoch": 0.97,
"grad_norm": 0.9016112685203552,
"learning_rate": 2.8110146093814906e-08,
"loss": 0.555,
"step": 15267
},
{
"epoch": 0.97,
"grad_norm": 0.8790071606636047,
"learning_rate": 2.8001609465061474e-08,
"loss": 0.5744,
"step": 15268
},
{
"epoch": 0.97,
"grad_norm": 0.8602889180183411,
"learning_rate": 2.7893282189863647e-08,
"loss": 0.5734,
"step": 15269
},
{
"epoch": 0.97,
"grad_norm": 0.8754189014434814,
"learning_rate": 2.7785164272783327e-08,
"loss": 0.5627,
"step": 15270
},
{
"epoch": 0.97,
"grad_norm": 0.9688771367073059,
"learning_rate": 2.7677255718372986e-08,
"loss": 0.5618,
"step": 15271
},
{
"epoch": 0.97,
"grad_norm": 0.8828610181808472,
"learning_rate": 2.7569556531175657e-08,
"loss": 0.5808,
"step": 15272
},
{
"epoch": 0.97,
"grad_norm": 0.9202200174331665,
"learning_rate": 2.7462066715726045e-08,
"loss": 0.6168,
"step": 15273
},
{
"epoch": 0.97,
"grad_norm": 0.9173966646194458,
"learning_rate": 2.7354786276551083e-08,
"loss": 0.6054,
"step": 15274
},
{
"epoch": 0.97,
"grad_norm": 0.9728456139564514,
"learning_rate": 2.7247715218167714e-08,
"loss": 0.6074,
"step": 15275
},
{
"epoch": 0.97,
"grad_norm": 0.905205249786377,
"learning_rate": 2.7140853545083447e-08,
"loss": 0.5679,
"step": 15276
},
{
"epoch": 0.97,
"grad_norm": 0.9280872344970703,
"learning_rate": 2.703420126179912e-08,
"loss": 0.5779,
"step": 15277
},
{
"epoch": 0.97,
"grad_norm": 0.9241009950637817,
"learning_rate": 2.692775837280448e-08,
"loss": 0.6027,
"step": 15278
},
{
"epoch": 0.97,
"grad_norm": 0.8840800523757935,
"learning_rate": 2.682152488258205e-08,
"loss": 0.5911,
"step": 15279
},
{
"epoch": 0.97,
"grad_norm": 0.8568680286407471,
"learning_rate": 2.671550079560492e-08,
"loss": 0.5292,
"step": 15280
},
{
"epoch": 0.97,
"grad_norm": 0.8979047536849976,
"learning_rate": 2.6609686116337296e-08,
"loss": 0.5474,
"step": 15281
},
{
"epoch": 0.97,
"grad_norm": 0.9321437478065491,
"learning_rate": 2.6504080849234504e-08,
"loss": 0.6161,
"step": 15282
},
{
"epoch": 0.97,
"grad_norm": 0.8534190654754639,
"learning_rate": 2.6398684998742986e-08,
"loss": 0.5428,
"step": 15283
},
{
"epoch": 0.97,
"grad_norm": 0.855660080909729,
"learning_rate": 2.629349856930141e-08,
"loss": 0.5282,
"step": 15284
},
{
"epoch": 0.97,
"grad_norm": 0.9074161052703857,
"learning_rate": 2.6188521565338466e-08,
"loss": 0.5735,
"step": 15285
},
{
"epoch": 0.97,
"grad_norm": 0.8709417581558228,
"learning_rate": 2.608375399127394e-08,
"loss": 0.558,
"step": 15286
},
{
"epoch": 0.97,
"grad_norm": 0.8391317129135132,
"learning_rate": 2.5979195851519313e-08,
"loss": 0.5302,
"step": 15287
},
{
"epoch": 0.97,
"grad_norm": 0.9150146842002869,
"learning_rate": 2.5874847150477722e-08,
"loss": 0.5882,
"step": 15288
},
{
"epoch": 0.97,
"grad_norm": 0.8675903081893921,
"learning_rate": 2.5770707892542878e-08,
"loss": 0.5531,
"step": 15289
},
{
"epoch": 0.97,
"grad_norm": 0.9216609597206116,
"learning_rate": 2.5666778082099052e-08,
"loss": 0.5489,
"step": 15290
},
{
"epoch": 0.97,
"grad_norm": 0.9866342544555664,
"learning_rate": 2.5563057723522742e-08,
"loss": 0.5831,
"step": 15291
},
{
"epoch": 0.97,
"grad_norm": 0.9153217673301697,
"learning_rate": 2.5459546821181014e-08,
"loss": 0.5968,
"step": 15292
},
{
"epoch": 0.97,
"grad_norm": 0.8692642450332642,
"learning_rate": 2.5356245379433154e-08,
"loss": 0.5611,
"step": 15293
},
{
"epoch": 0.97,
"grad_norm": 0.8365652561187744,
"learning_rate": 2.525315340262846e-08,
"loss": 0.5344,
"step": 15294
},
{
"epoch": 0.97,
"grad_norm": 0.8992009162902832,
"learning_rate": 2.51502708951068e-08,
"loss": 0.6268,
"step": 15295
},
{
"epoch": 0.97,
"grad_norm": 0.9119232892990112,
"learning_rate": 2.5047597861201368e-08,
"loss": 0.5838,
"step": 15296
},
{
"epoch": 0.97,
"grad_norm": 0.9377126097679138,
"learning_rate": 2.494513430523482e-08,
"loss": 0.5693,
"step": 15297
},
{
"epoch": 0.97,
"grad_norm": 0.9767472147941589,
"learning_rate": 2.4842880231522038e-08,
"loss": 0.5736,
"step": 15298
},
{
"epoch": 0.97,
"grad_norm": 0.8412918448448181,
"learning_rate": 2.474083564436791e-08,
"loss": 0.5326,
"step": 15299
},
{
"epoch": 0.97,
"grad_norm": 0.8873780965805054,
"learning_rate": 2.4639000548070114e-08,
"loss": 0.6094,
"step": 15300
},
{
"epoch": 0.97,
"grad_norm": 0.8979631066322327,
"learning_rate": 2.4537374946915215e-08,
"loss": 0.5258,
"step": 15301
},
{
"epoch": 0.97,
"grad_norm": 0.8989670872688293,
"learning_rate": 2.4435958845183684e-08,
"loss": 0.5554,
"step": 15302
},
{
"epoch": 0.97,
"grad_norm": 0.8940199017524719,
"learning_rate": 2.4334752247145433e-08,
"loss": 0.5619,
"step": 15303
},
{
"epoch": 0.97,
"grad_norm": 0.8079856038093567,
"learning_rate": 2.4233755157060944e-08,
"loss": 0.5733,
"step": 15304
},
{
"epoch": 0.97,
"grad_norm": 0.9142255187034607,
"learning_rate": 2.413296757918404e-08,
"loss": 0.5756,
"step": 15305
},
{
"epoch": 0.97,
"grad_norm": 0.8849722146987915,
"learning_rate": 2.4032389517757993e-08,
"loss": 0.5724,
"step": 15306
},
{
"epoch": 0.97,
"grad_norm": 0.8250989317893982,
"learning_rate": 2.3932020977017745e-08,
"loss": 0.5244,
"step": 15307
},
{
"epoch": 0.97,
"grad_norm": 0.8586926460266113,
"learning_rate": 2.3831861961189917e-08,
"loss": 0.5474,
"step": 15308
},
{
"epoch": 0.97,
"grad_norm": 0.8741357326507568,
"learning_rate": 2.3731912474491137e-08,
"loss": 0.5359,
"step": 15309
},
{
"epoch": 0.97,
"grad_norm": 0.8332794308662415,
"learning_rate": 2.3632172521130815e-08,
"loss": 0.51,
"step": 15310
},
{
"epoch": 0.97,
"grad_norm": 0.8357459902763367,
"learning_rate": 2.3532642105307813e-08,
"loss": 0.5387,
"step": 15311
},
{
"epoch": 0.97,
"grad_norm": 0.900471568107605,
"learning_rate": 2.3433321231213778e-08,
"loss": 0.5677,
"step": 15312
},
{
"epoch": 0.97,
"grad_norm": 0.9303227066993713,
"learning_rate": 2.3334209903029815e-08,
"loss": 0.5955,
"step": 15313
},
{
"epoch": 0.97,
"grad_norm": 0.8947436809539795,
"learning_rate": 2.3235308124930357e-08,
"loss": 0.5752,
"step": 15314
},
{
"epoch": 0.97,
"grad_norm": 0.9085913896560669,
"learning_rate": 2.3136615901078742e-08,
"loss": 0.5911,
"step": 15315
},
{
"epoch": 0.97,
"grad_norm": 0.8889240026473999,
"learning_rate": 2.303813323563109e-08,
"loss": 0.5651,
"step": 15316
},
{
"epoch": 0.97,
"grad_norm": 0.9324416518211365,
"learning_rate": 2.2939860132734084e-08,
"loss": 0.5499,
"step": 15317
},
{
"epoch": 0.97,
"grad_norm": 0.8463728427886963,
"learning_rate": 2.2841796596525522e-08,
"loss": 0.5775,
"step": 15318
},
{
"epoch": 0.97,
"grad_norm": 0.9454851150512695,
"learning_rate": 2.2743942631134886e-08,
"loss": 0.5532,
"step": 15319
},
{
"epoch": 0.97,
"grad_norm": 0.9939208030700684,
"learning_rate": 2.264629824068165e-08,
"loss": 0.661,
"step": 15320
},
{
"epoch": 0.97,
"grad_norm": 0.952218234539032,
"learning_rate": 2.2548863429278645e-08,
"loss": 0.6229,
"step": 15321
},
{
"epoch": 0.97,
"grad_norm": 0.8898659348487854,
"learning_rate": 2.2451638201027026e-08,
"loss": 0.5843,
"step": 15322
},
{
"epoch": 0.97,
"grad_norm": 0.9616308212280273,
"learning_rate": 2.2354622560021854e-08,
"loss": 0.5664,
"step": 15323
},
{
"epoch": 0.97,
"grad_norm": 0.8662042021751404,
"learning_rate": 2.2257816510347086e-08,
"loss": 0.55,
"step": 15324
},
{
"epoch": 0.97,
"grad_norm": 0.9076823592185974,
"learning_rate": 2.2161220056079457e-08,
"loss": 0.5385,
"step": 15325
},
{
"epoch": 0.97,
"grad_norm": 0.9138240218162537,
"learning_rate": 2.2064833201286827e-08,
"loss": 0.5659,
"step": 15326
},
{
"epoch": 0.97,
"grad_norm": 0.8747329115867615,
"learning_rate": 2.1968655950026508e-08,
"loss": 0.6079,
"step": 15327
},
{
"epoch": 0.97,
"grad_norm": 0.8825821876525879,
"learning_rate": 2.187268830634859e-08,
"loss": 0.5695,
"step": 15328
},
{
"epoch": 0.97,
"grad_norm": 0.8999965190887451,
"learning_rate": 2.1776930274294283e-08,
"loss": 0.5606,
"step": 15329
},
{
"epoch": 0.97,
"grad_norm": 0.8908900022506714,
"learning_rate": 2.1681381857895923e-08,
"loss": 0.5693,
"step": 15330
},
{
"epoch": 0.97,
"grad_norm": 0.87883061170578,
"learning_rate": 2.1586043061175842e-08,
"loss": 0.5597,
"step": 15331
},
{
"epoch": 0.97,
"grad_norm": 0.8488723635673523,
"learning_rate": 2.1490913888149166e-08,
"loss": 0.6041,
"step": 15332
},
{
"epoch": 0.97,
"grad_norm": 0.9183140993118286,
"learning_rate": 2.139599434282047e-08,
"loss": 0.5741,
"step": 15333
},
{
"epoch": 0.97,
"grad_norm": 0.9319660663604736,
"learning_rate": 2.130128442918766e-08,
"loss": 0.641,
"step": 15334
},
{
"epoch": 0.97,
"grad_norm": 0.9717698097229004,
"learning_rate": 2.1206784151238113e-08,
"loss": 0.5749,
"step": 15335
},
{
"epoch": 0.97,
"grad_norm": 0.8720336556434631,
"learning_rate": 2.111249351295086e-08,
"loss": 0.6203,
"step": 15336
},
{
"epoch": 0.97,
"grad_norm": 0.8387833833694458,
"learning_rate": 2.1018412518296617e-08,
"loss": 0.5874,
"step": 15337
},
{
"epoch": 0.97,
"grad_norm": 0.8112475872039795,
"learning_rate": 2.0924541171235545e-08,
"loss": 0.5653,
"step": 15338
},
{
"epoch": 0.97,
"grad_norm": 0.9344534277915955,
"learning_rate": 2.083087947572171e-08,
"loss": 0.567,
"step": 15339
},
{
"epoch": 0.97,
"grad_norm": 0.9421919584274292,
"learning_rate": 2.073742743569862e-08,
"loss": 0.5728,
"step": 15340
},
{
"epoch": 0.97,
"grad_norm": 0.9264227151870728,
"learning_rate": 2.0644185055100352e-08,
"loss": 0.6343,
"step": 15341
},
{
"epoch": 0.97,
"grad_norm": 0.8339887857437134,
"learning_rate": 2.0551152337853208e-08,
"loss": 0.5588,
"step": 15342
},
{
"epoch": 0.97,
"grad_norm": 0.9266855120658875,
"learning_rate": 2.0458329287875168e-08,
"loss": 0.5816,
"step": 15343
},
{
"epoch": 0.97,
"grad_norm": 0.9328429698944092,
"learning_rate": 2.0365715909074213e-08,
"loss": 0.5793,
"step": 15344
},
{
"epoch": 0.97,
"grad_norm": 0.8851380944252014,
"learning_rate": 2.027331220535056e-08,
"loss": 0.6006,
"step": 15345
},
{
"epoch": 0.97,
"grad_norm": 0.9010560512542725,
"learning_rate": 2.018111818059387e-08,
"loss": 0.4876,
"step": 15346
},
{
"epoch": 0.97,
"grad_norm": 0.9649263620376587,
"learning_rate": 2.008913383868716e-08,
"loss": 0.6252,
"step": 15347
},
{
"epoch": 0.97,
"grad_norm": 0.9451420903205872,
"learning_rate": 1.999735918350343e-08,
"loss": 0.5907,
"step": 15348
},
{
"epoch": 0.97,
"grad_norm": 0.9090909361839294,
"learning_rate": 1.990579421890626e-08,
"loss": 0.5909,
"step": 15349
},
{
"epoch": 0.97,
"grad_norm": 0.8864248991012573,
"learning_rate": 1.9814438948751458e-08,
"loss": 0.5775,
"step": 15350
},
{
"epoch": 0.97,
"grad_norm": 0.8502189517021179,
"learning_rate": 1.9723293376886497e-08,
"loss": 0.5513,
"step": 15351
},
{
"epoch": 0.97,
"grad_norm": 0.8676384687423706,
"learning_rate": 1.963235750714776e-08,
"loss": 0.6178,
"step": 15352
},
{
"epoch": 0.97,
"grad_norm": 0.9123603701591492,
"learning_rate": 1.9541631343365507e-08,
"loss": 0.5994,
"step": 15353
},
{
"epoch": 0.97,
"grad_norm": 1.0008543729782104,
"learning_rate": 1.9451114889359468e-08,
"loss": 0.6402,
"step": 15354
},
{
"epoch": 0.97,
"grad_norm": 0.925025999546051,
"learning_rate": 1.936080814894048e-08,
"loss": 0.5693,
"step": 15355
},
{
"epoch": 0.97,
"grad_norm": 0.9011825323104858,
"learning_rate": 1.9270711125912167e-08,
"loss": 0.5558,
"step": 15356
},
{
"epoch": 0.97,
"grad_norm": 0.8734168410301208,
"learning_rate": 1.9180823824067053e-08,
"loss": 0.5738,
"step": 15357
},
{
"epoch": 0.97,
"grad_norm": 0.858608067035675,
"learning_rate": 1.909114624719044e-08,
"loss": 0.578,
"step": 15358
},
{
"epoch": 0.97,
"grad_norm": 0.8652524352073669,
"learning_rate": 1.90016783990582e-08,
"loss": 0.5556,
"step": 15359
},
{
"epoch": 0.97,
"grad_norm": 0.9070523977279663,
"learning_rate": 1.891242028343787e-08,
"loss": 0.6098,
"step": 15360
},
{
"epoch": 0.97,
"grad_norm": 0.8430723547935486,
"learning_rate": 1.8823371904087563e-08,
"loss": 0.5671,
"step": 15361
},
{
"epoch": 0.97,
"grad_norm": 0.8682308197021484,
"learning_rate": 1.8734533264757047e-08,
"loss": 0.5375,
"step": 15362
},
{
"epoch": 0.97,
"grad_norm": 0.9168040156364441,
"learning_rate": 1.864590436918612e-08,
"loss": 0.611,
"step": 15363
},
{
"epoch": 0.97,
"grad_norm": 0.9281341433525085,
"learning_rate": 1.8557485221107897e-08,
"loss": 0.5621,
"step": 15364
},
{
"epoch": 0.97,
"grad_norm": 0.9351321458816528,
"learning_rate": 1.8469275824244958e-08,
"loss": 0.5595,
"step": 15365
},
{
"epoch": 0.97,
"grad_norm": 0.9267570376396179,
"learning_rate": 1.8381276182311004e-08,
"loss": 0.5362,
"step": 15366
},
{
"epoch": 0.97,
"grad_norm": 0.9369710087776184,
"learning_rate": 1.8293486299011398e-08,
"loss": 0.5719,
"step": 15367
},
{
"epoch": 0.97,
"grad_norm": 0.9404371380805969,
"learning_rate": 1.8205906178043186e-08,
"loss": 0.5835,
"step": 15368
},
{
"epoch": 0.97,
"grad_norm": 0.8816442489624023,
"learning_rate": 1.811853582309453e-08,
"loss": 0.6092,
"step": 15369
},
{
"epoch": 0.97,
"grad_norm": 0.8771417737007141,
"learning_rate": 1.803137523784304e-08,
"loss": 0.5125,
"step": 15370
},
{
"epoch": 0.97,
"grad_norm": 0.8539003133773804,
"learning_rate": 1.7944424425959116e-08,
"loss": 0.5784,
"step": 15371
},
{
"epoch": 0.97,
"grad_norm": 0.876171886920929,
"learning_rate": 1.7857683391104273e-08,
"loss": 0.5423,
"step": 15372
},
{
"epoch": 0.97,
"grad_norm": 0.8787450790405273,
"learning_rate": 1.7771152136931147e-08,
"loss": 0.5901,
"step": 15373
},
{
"epoch": 0.97,
"grad_norm": 0.9184008836746216,
"learning_rate": 1.7684830667082377e-08,
"loss": 0.5802,
"step": 15374
},
{
"epoch": 0.97,
"grad_norm": 0.887069046497345,
"learning_rate": 1.759871898519394e-08,
"loss": 0.5638,
"step": 15375
},
{
"epoch": 0.97,
"grad_norm": 0.8621271848678589,
"learning_rate": 1.7512817094890167e-08,
"loss": 0.5408,
"step": 15376
},
{
"epoch": 0.97,
"grad_norm": 0.8775637745857239,
"learning_rate": 1.742712499978927e-08,
"loss": 0.5448,
"step": 15377
},
{
"epoch": 0.97,
"grad_norm": 0.9085080027580261,
"learning_rate": 1.734164270349892e-08,
"loss": 0.5373,
"step": 15378
},
{
"epoch": 0.97,
"grad_norm": 0.8680553436279297,
"learning_rate": 1.7256370209618458e-08,
"loss": 0.6011,
"step": 15379
},
{
"epoch": 0.97,
"grad_norm": 0.9011398553848267,
"learning_rate": 1.71713075217389e-08,
"loss": 0.5927,
"step": 15380
},
{
"epoch": 0.97,
"grad_norm": 0.9100791215896606,
"learning_rate": 1.7086454643441273e-08,
"loss": 0.592,
"step": 15381
},
{
"epoch": 0.97,
"grad_norm": 0.8486478924751282,
"learning_rate": 1.7001811578298832e-08,
"loss": 0.5193,
"step": 15382
},
{
"epoch": 0.97,
"grad_norm": 0.8972152471542358,
"learning_rate": 1.6917378329875946e-08,
"loss": 0.5594,
"step": 15383
},
{
"epoch": 0.97,
"grad_norm": 0.8258572220802307,
"learning_rate": 1.6833154901726988e-08,
"loss": 0.5933,
"step": 15384
},
{
"epoch": 0.97,
"grad_norm": 0.9576346278190613,
"learning_rate": 1.6749141297398574e-08,
"loss": 0.5544,
"step": 15385
},
{
"epoch": 0.97,
"grad_norm": 0.8871638178825378,
"learning_rate": 1.6665337520428427e-08,
"loss": 0.5285,
"step": 15386
},
{
"epoch": 0.97,
"grad_norm": 0.8549116253852844,
"learning_rate": 1.658174357434483e-08,
"loss": 0.5918,
"step": 15387
},
{
"epoch": 0.97,
"grad_norm": 0.9582047462463379,
"learning_rate": 1.649835946266831e-08,
"loss": 0.6273,
"step": 15388
},
{
"epoch": 0.97,
"grad_norm": 0.8410069346427917,
"learning_rate": 1.6415185188909944e-08,
"loss": 0.5405,
"step": 15389
},
{
"epoch": 0.98,
"grad_norm": 0.8835951685905457,
"learning_rate": 1.6332220756570815e-08,
"loss": 0.5562,
"step": 15390
},
{
"epoch": 0.98,
"grad_norm": 0.8853712677955627,
"learning_rate": 1.6249466169145354e-08,
"loss": 0.6191,
"step": 15391
},
{
"epoch": 0.98,
"grad_norm": 0.887049674987793,
"learning_rate": 1.6166921430118e-08,
"loss": 0.5563,
"step": 15392
},
{
"epoch": 0.98,
"grad_norm": 0.9243970513343811,
"learning_rate": 1.608458654296319e-08,
"loss": 0.5632,
"step": 15393
},
{
"epoch": 0.98,
"grad_norm": 0.9056985378265381,
"learning_rate": 1.600246151114926e-08,
"loss": 0.5555,
"step": 15394
},
{
"epoch": 0.98,
"grad_norm": 0.8855133056640625,
"learning_rate": 1.5920546338133447e-08,
"loss": 0.5699,
"step": 15395
},
{
"epoch": 0.98,
"grad_norm": 0.8974730968475342,
"learning_rate": 1.5838841027365215e-08,
"loss": 0.5759,
"step": 15396
},
{
"epoch": 0.98,
"grad_norm": 0.876063883304596,
"learning_rate": 1.5757345582285144e-08,
"loss": 0.5996,
"step": 15397
},
{
"epoch": 0.98,
"grad_norm": 0.8951111435890198,
"learning_rate": 1.5676060006323267e-08,
"loss": 0.6397,
"step": 15398
},
{
"epoch": 0.98,
"grad_norm": 0.9184896349906921,
"learning_rate": 1.559498430290407e-08,
"loss": 0.5522,
"step": 15399
},
{
"epoch": 0.98,
"grad_norm": 0.9113056063652039,
"learning_rate": 1.5514118475440378e-08,
"loss": 0.5542,
"step": 15400
},
{
"epoch": 0.98,
"grad_norm": 0.8028354048728943,
"learning_rate": 1.5433462527337793e-08,
"loss": 0.5046,
"step": 15401
},
{
"epoch": 0.98,
"grad_norm": 0.8966811299324036,
"learning_rate": 1.5353016461991387e-08,
"loss": 0.6253,
"step": 15402
},
{
"epoch": 0.98,
"grad_norm": 0.8664458394050598,
"learning_rate": 1.5272780282789556e-08,
"loss": 0.6081,
"step": 15403
},
{
"epoch": 0.98,
"grad_norm": 0.8263579607009888,
"learning_rate": 1.5192753993110155e-08,
"loss": 0.535,
"step": 15404
},
{
"epoch": 0.98,
"grad_norm": 0.908085286617279,
"learning_rate": 1.5112937596323263e-08,
"loss": 0.6077,
"step": 15405
},
{
"epoch": 0.98,
"grad_norm": 0.895283579826355,
"learning_rate": 1.5033331095788973e-08,
"loss": 0.5622,
"step": 15406
},
{
"epoch": 0.98,
"grad_norm": 0.84937584400177,
"learning_rate": 1.4953934494860155e-08,
"loss": 0.4815,
"step": 15407
},
{
"epoch": 0.98,
"grad_norm": 0.8702456951141357,
"learning_rate": 1.4874747796879142e-08,
"loss": 0.5375,
"step": 15408
},
{
"epoch": 0.98,
"grad_norm": 0.9346665740013123,
"learning_rate": 1.4795771005181036e-08,
"loss": 0.633,
"step": 15409
},
{
"epoch": 0.98,
"grad_norm": 0.8707761764526367,
"learning_rate": 1.4717004123090406e-08,
"loss": 0.5525,
"step": 15410
},
{
"epoch": 0.98,
"grad_norm": 0.9167184829711914,
"learning_rate": 1.463844715392404e-08,
"loss": 0.6256,
"step": 15411
},
{
"epoch": 0.98,
"grad_norm": 0.8158385157585144,
"learning_rate": 1.4560100100989849e-08,
"loss": 0.514,
"step": 15412
},
{
"epoch": 0.98,
"grad_norm": 0.8651106953620911,
"learning_rate": 1.448196296758686e-08,
"loss": 0.5446,
"step": 15413
},
{
"epoch": 0.98,
"grad_norm": 0.8702985644340515,
"learning_rate": 1.4404035757005219e-08,
"loss": 0.5671,
"step": 15414
},
{
"epoch": 0.98,
"grad_norm": 0.9393275380134583,
"learning_rate": 1.4326318472525635e-08,
"loss": 0.567,
"step": 15415
},
{
"epoch": 0.98,
"grad_norm": 0.9015846252441406,
"learning_rate": 1.4248811117421046e-08,
"loss": 0.5923,
"step": 15416
},
{
"epoch": 0.98,
"grad_norm": 0.9426272511482239,
"learning_rate": 1.4171513694954953e-08,
"loss": 0.5628,
"step": 15417
},
{
"epoch": 0.98,
"grad_norm": 0.9212367534637451,
"learning_rate": 1.4094426208381972e-08,
"loss": 0.6307,
"step": 15418
},
{
"epoch": 0.98,
"grad_norm": 0.9438113570213318,
"learning_rate": 1.4017548660947844e-08,
"loss": 0.5934,
"step": 15419
},
{
"epoch": 0.98,
"grad_norm": 0.8640725016593933,
"learning_rate": 1.3940881055889976e-08,
"loss": 0.594,
"step": 15420
},
{
"epoch": 0.98,
"grad_norm": 0.9187299609184265,
"learning_rate": 1.3864423396436344e-08,
"loss": 0.55,
"step": 15421
},
{
"epoch": 0.98,
"grad_norm": 0.8524268865585327,
"learning_rate": 1.3788175685806594e-08,
"loss": 0.5912,
"step": 15422
},
{
"epoch": 0.98,
"grad_norm": 0.9235708713531494,
"learning_rate": 1.3712137927210377e-08,
"loss": 0.5946,
"step": 15423
},
{
"epoch": 0.98,
"grad_norm": 0.911343514919281,
"learning_rate": 1.3636310123850694e-08,
"loss": 0.6099,
"step": 15424
},
{
"epoch": 0.98,
"grad_norm": 0.8266077637672424,
"learning_rate": 1.3560692278919429e-08,
"loss": 0.554,
"step": 15425
},
{
"epoch": 0.98,
"grad_norm": 0.8780014514923096,
"learning_rate": 1.3485284395600707e-08,
"loss": 0.5294,
"step": 15426
},
{
"epoch": 0.98,
"grad_norm": 0.9280437231063843,
"learning_rate": 1.3410086477069761e-08,
"loss": 0.6171,
"step": 15427
},
{
"epoch": 0.98,
"grad_norm": 0.8806030750274658,
"learning_rate": 1.333509852649295e-08,
"loss": 0.551,
"step": 15428
},
{
"epoch": 0.98,
"grad_norm": 0.8715260028839111,
"learning_rate": 1.3260320547028305e-08,
"loss": 0.5821,
"step": 15429
},
{
"epoch": 0.98,
"grad_norm": 0.8022364377975464,
"learning_rate": 1.3185752541823304e-08,
"loss": 0.5608,
"step": 15430
},
{
"epoch": 0.98,
"grad_norm": 0.9580459594726562,
"learning_rate": 1.3111394514018772e-08,
"loss": 0.5416,
"step": 15431
},
{
"epoch": 0.98,
"grad_norm": 0.8772706985473633,
"learning_rate": 1.3037246466745535e-08,
"loss": 0.5591,
"step": 15432
},
{
"epoch": 0.98,
"grad_norm": 0.8983436822891235,
"learning_rate": 1.2963308403124985e-08,
"loss": 0.5797,
"step": 15433
},
{
"epoch": 0.98,
"grad_norm": 0.9700096249580383,
"learning_rate": 1.2889580326271301e-08,
"loss": 0.5816,
"step": 15434
},
{
"epoch": 0.98,
"grad_norm": 0.8904829621315002,
"learning_rate": 1.2816062239288107e-08,
"loss": 0.5378,
"step": 15435
},
{
"epoch": 0.98,
"grad_norm": 0.9526095390319824,
"learning_rate": 1.2742754145271264e-08,
"loss": 0.5637,
"step": 15436
},
{
"epoch": 0.98,
"grad_norm": 0.9104148745536804,
"learning_rate": 1.2669656047308299e-08,
"loss": 0.5773,
"step": 15437
},
{
"epoch": 0.98,
"grad_norm": 0.9310768246650696,
"learning_rate": 1.2596767948476196e-08,
"loss": 0.603,
"step": 15438
},
{
"epoch": 0.98,
"grad_norm": 0.9144603610038757,
"learning_rate": 1.2524089851844168e-08,
"loss": 0.5422,
"step": 15439
},
{
"epoch": 0.98,
"grad_norm": 0.856395959854126,
"learning_rate": 1.2451621760472544e-08,
"loss": 0.5526,
"step": 15440
},
{
"epoch": 0.98,
"grad_norm": 0.9461926817893982,
"learning_rate": 1.237936367741277e-08,
"loss": 0.5741,
"step": 15441
},
{
"epoch": 0.98,
"grad_norm": 0.9388793706893921,
"learning_rate": 1.2307315605707416e-08,
"loss": 0.5818,
"step": 15442
},
{
"epoch": 0.98,
"grad_norm": 0.9289106726646423,
"learning_rate": 1.2235477548390162e-08,
"loss": 0.6053,
"step": 15443
},
{
"epoch": 0.98,
"grad_norm": 0.821549654006958,
"learning_rate": 1.2163849508485259e-08,
"loss": 0.5101,
"step": 15444
},
{
"epoch": 0.98,
"grad_norm": 0.8986890316009521,
"learning_rate": 1.2092431489009738e-08,
"loss": 0.5866,
"step": 15445
},
{
"epoch": 0.98,
"grad_norm": 0.8315547704696655,
"learning_rate": 1.202122349297008e-08,
"loss": 0.5505,
"step": 15446
},
{
"epoch": 0.98,
"grad_norm": 0.9459112882614136,
"learning_rate": 1.1950225523365e-08,
"loss": 0.6082,
"step": 15447
},
{
"epoch": 0.98,
"grad_norm": 0.8490333557128906,
"learning_rate": 1.1879437583183217e-08,
"loss": 0.5561,
"step": 15448
},
{
"epoch": 0.98,
"grad_norm": 0.8695975542068481,
"learning_rate": 1.1808859675406236e-08,
"loss": 0.5987,
"step": 15449
},
{
"epoch": 0.98,
"grad_norm": 0.9123320579528809,
"learning_rate": 1.173849180300557e-08,
"loss": 0.5599,
"step": 15450
},
{
"epoch": 0.98,
"grad_norm": 0.9430971145629883,
"learning_rate": 1.16683339689444e-08,
"loss": 0.6193,
"step": 15451
},
{
"epoch": 0.98,
"grad_norm": 0.871895968914032,
"learning_rate": 1.1598386176175924e-08,
"loss": 0.567,
"step": 15452
},
{
"epoch": 0.98,
"grad_norm": 0.852607786655426,
"learning_rate": 1.1528648427646671e-08,
"loss": 0.5368,
"step": 15453
},
{
"epoch": 0.98,
"grad_norm": 0.887416422367096,
"learning_rate": 1.1459120726292072e-08,
"loss": 0.5469,
"step": 15454
},
{
"epoch": 0.98,
"grad_norm": 0.9012024998664856,
"learning_rate": 1.1389803075039785e-08,
"loss": 0.5667,
"step": 15455
},
{
"epoch": 0.98,
"grad_norm": 0.8867619633674622,
"learning_rate": 1.1320695476809141e-08,
"loss": 0.6016,
"step": 15456
},
{
"epoch": 0.98,
"grad_norm": 0.896775484085083,
"learning_rate": 1.1251797934509478e-08,
"loss": 0.5845,
"step": 15457
},
{
"epoch": 0.98,
"grad_norm": 0.9201370477676392,
"learning_rate": 1.1183110451042368e-08,
"loss": 0.5236,
"step": 15458
},
{
"epoch": 0.98,
"grad_norm": 0.877086877822876,
"learning_rate": 1.1114633029299382e-08,
"loss": 0.5334,
"step": 15459
},
{
"epoch": 0.98,
"grad_norm": 0.8296651840209961,
"learning_rate": 1.1046365672163772e-08,
"loss": 0.5453,
"step": 15460
},
{
"epoch": 0.98,
"grad_norm": 0.8853237628936768,
"learning_rate": 1.0978308382511016e-08,
"loss": 0.5937,
"step": 15461
},
{
"epoch": 0.98,
"grad_norm": 0.9216740131378174,
"learning_rate": 1.0910461163206043e-08,
"loss": 0.5611,
"step": 15462
},
{
"epoch": 0.98,
"grad_norm": 0.9715553522109985,
"learning_rate": 1.0842824017105458e-08,
"loss": 0.5791,
"step": 15463
},
{
"epoch": 0.98,
"grad_norm": 0.8290508985519409,
"learning_rate": 1.0775396947057537e-08,
"loss": 0.5391,
"step": 15464
},
{
"epoch": 0.98,
"grad_norm": 0.8305854201316833,
"learning_rate": 1.0708179955901677e-08,
"loss": 0.5389,
"step": 15465
},
{
"epoch": 0.98,
"grad_norm": 0.9410961866378784,
"learning_rate": 1.0641173046467833e-08,
"loss": 0.5963,
"step": 15466
},
{
"epoch": 0.98,
"grad_norm": 0.9235133528709412,
"learning_rate": 1.0574376221577642e-08,
"loss": 0.5421,
"step": 15467
},
{
"epoch": 0.98,
"grad_norm": 0.8532764315605164,
"learning_rate": 1.0507789484043295e-08,
"loss": 0.4974,
"step": 15468
},
{
"epoch": 0.98,
"grad_norm": 0.874849796295166,
"learning_rate": 1.0441412836668663e-08,
"loss": 0.5337,
"step": 15469
},
{
"epoch": 0.98,
"grad_norm": 0.8603571057319641,
"learning_rate": 1.037524628224873e-08,
"loss": 0.5523,
"step": 15470
},
{
"epoch": 0.98,
"grad_norm": 0.871017575263977,
"learning_rate": 1.0309289823569601e-08,
"loss": 0.5627,
"step": 15471
},
{
"epoch": 0.98,
"grad_norm": 0.9409274458885193,
"learning_rate": 1.02435434634085e-08,
"loss": 0.5904,
"step": 15472
},
{
"epoch": 0.98,
"grad_norm": 1.0156452655792236,
"learning_rate": 1.0178007204533768e-08,
"loss": 0.5892,
"step": 15473
},
{
"epoch": 0.98,
"grad_norm": 0.8976706266403198,
"learning_rate": 1.0112681049704865e-08,
"loss": 0.5797,
"step": 15474
},
{
"epoch": 0.98,
"grad_norm": 0.8561591506004333,
"learning_rate": 1.004756500167181e-08,
"loss": 0.5468,
"step": 15475
},
{
"epoch": 0.98,
"grad_norm": 0.8820354342460632,
"learning_rate": 9.982659063177413e-09,
"loss": 0.57,
"step": 15476
},
{
"epoch": 0.98,
"grad_norm": 0.8407560586929321,
"learning_rate": 9.917963236954487e-09,
"loss": 0.5503,
"step": 15477
},
{
"epoch": 0.98,
"grad_norm": 0.9282391667366028,
"learning_rate": 9.853477525726962e-09,
"loss": 0.5349,
"step": 15478
},
{
"epoch": 0.98,
"grad_norm": 0.8840251564979553,
"learning_rate": 9.789201932209335e-09,
"loss": 0.6081,
"step": 15479
},
{
"epoch": 0.98,
"grad_norm": 0.9751169681549072,
"learning_rate": 9.725136459109441e-09,
"loss": 0.6371,
"step": 15480
},
{
"epoch": 0.98,
"grad_norm": 0.8782668709754944,
"learning_rate": 9.66128110912401e-09,
"loss": 0.5463,
"step": 15481
},
{
"epoch": 0.98,
"grad_norm": 0.7854354381561279,
"learning_rate": 9.597635884941447e-09,
"loss": 0.5376,
"step": 15482
},
{
"epoch": 0.98,
"grad_norm": 0.847726047039032,
"learning_rate": 9.534200789242388e-09,
"loss": 0.5804,
"step": 15483
},
{
"epoch": 0.98,
"grad_norm": 0.8897963166236877,
"learning_rate": 9.470975824698025e-09,
"loss": 0.6062,
"step": 15484
},
{
"epoch": 0.98,
"grad_norm": 0.930458128452301,
"learning_rate": 9.407960993969567e-09,
"loss": 0.6237,
"step": 15485
},
{
"epoch": 0.98,
"grad_norm": 0.8038657903671265,
"learning_rate": 9.345156299711e-09,
"loss": 0.5404,
"step": 15486
},
{
"epoch": 0.98,
"grad_norm": 0.8747665286064148,
"learning_rate": 9.282561744566321e-09,
"loss": 0.551,
"step": 15487
},
{
"epoch": 0.98,
"grad_norm": 0.8687538504600525,
"learning_rate": 9.220177331172309e-09,
"loss": 0.5515,
"step": 15488
},
{
"epoch": 0.98,
"grad_norm": 0.8491008877754211,
"learning_rate": 9.158003062154642e-09,
"loss": 0.5227,
"step": 15489
},
{
"epoch": 0.98,
"grad_norm": 0.9173932671546936,
"learning_rate": 9.096038940131225e-09,
"loss": 0.6004,
"step": 15490
},
{
"epoch": 0.98,
"grad_norm": 0.817330539226532,
"learning_rate": 9.034284967711637e-09,
"loss": 0.5088,
"step": 15491
},
{
"epoch": 0.98,
"grad_norm": 0.8443012833595276,
"learning_rate": 8.972741147496023e-09,
"loss": 0.5755,
"step": 15492
},
{
"epoch": 0.98,
"grad_norm": 0.804356575012207,
"learning_rate": 8.911407482076196e-09,
"loss": 0.5244,
"step": 15493
},
{
"epoch": 0.98,
"grad_norm": 0.865323007106781,
"learning_rate": 8.85028397403398e-09,
"loss": 0.5702,
"step": 15494
},
{
"epoch": 0.98,
"grad_norm": 0.9057186245918274,
"learning_rate": 8.789370625943427e-09,
"loss": 0.5933,
"step": 15495
},
{
"epoch": 0.98,
"grad_norm": 0.9095432162284851,
"learning_rate": 8.728667440369153e-09,
"loss": 0.5645,
"step": 15496
},
{
"epoch": 0.98,
"grad_norm": 0.8884914517402649,
"learning_rate": 8.668174419867449e-09,
"loss": 0.6086,
"step": 15497
},
{
"epoch": 0.98,
"grad_norm": 0.9319071173667908,
"learning_rate": 8.60789156698516e-09,
"loss": 0.5349,
"step": 15498
},
{
"epoch": 0.98,
"grad_norm": 0.9238869547843933,
"learning_rate": 8.547818884260816e-09,
"loss": 0.5856,
"step": 15499
},
{
"epoch": 0.98,
"grad_norm": 0.8431046009063721,
"learning_rate": 8.48795637422406e-09,
"loss": 0.5275,
"step": 15500
},
{
"epoch": 0.98,
"grad_norm": 0.884147584438324,
"learning_rate": 8.428304039395096e-09,
"loss": 0.5912,
"step": 15501
},
{
"epoch": 0.98,
"grad_norm": 0.8942022919654846,
"learning_rate": 8.368861882285806e-09,
"loss": 0.5818,
"step": 15502
},
{
"epoch": 0.98,
"grad_norm": 0.8837722539901733,
"learning_rate": 8.309629905399186e-09,
"loss": 0.5234,
"step": 15503
},
{
"epoch": 0.98,
"grad_norm": 0.8658926486968994,
"learning_rate": 8.250608111229352e-09,
"loss": 0.599,
"step": 15504
},
{
"epoch": 0.98,
"grad_norm": 0.9732296466827393,
"learning_rate": 8.191796502260985e-09,
"loss": 0.6188,
"step": 15505
},
{
"epoch": 0.98,
"grad_norm": 0.9690650701522827,
"learning_rate": 8.13319508097099e-09,
"loss": 0.6361,
"step": 15506
},
{
"epoch": 0.98,
"grad_norm": 0.9391032457351685,
"learning_rate": 8.074803849827395e-09,
"loss": 0.5799,
"step": 15507
},
{
"epoch": 0.98,
"grad_norm": 0.8532130122184753,
"learning_rate": 8.016622811287123e-09,
"loss": 0.5294,
"step": 15508
},
{
"epoch": 0.98,
"grad_norm": 0.8348953127861023,
"learning_rate": 7.958651967801545e-09,
"loss": 0.5303,
"step": 15509
},
{
"epoch": 0.98,
"grad_norm": 0.865597128868103,
"learning_rate": 7.900891321810932e-09,
"loss": 0.5601,
"step": 15510
},
{
"epoch": 0.98,
"grad_norm": 0.9278403520584106,
"learning_rate": 7.843340875747785e-09,
"loss": 0.6282,
"step": 15511
},
{
"epoch": 0.98,
"grad_norm": 0.8793516159057617,
"learning_rate": 7.786000632035163e-09,
"loss": 0.5598,
"step": 15512
},
{
"epoch": 0.98,
"grad_norm": 0.8610735535621643,
"learning_rate": 7.728870593087246e-09,
"loss": 0.5739,
"step": 15513
},
{
"epoch": 0.98,
"grad_norm": 0.9375894665718079,
"learning_rate": 7.671950761309333e-09,
"loss": 0.5678,
"step": 15514
},
{
"epoch": 0.98,
"grad_norm": 0.8614829182624817,
"learning_rate": 7.61524113909895e-09,
"loss": 0.5117,
"step": 15515
},
{
"epoch": 0.98,
"grad_norm": 0.8722830414772034,
"learning_rate": 7.558741728843633e-09,
"loss": 0.5368,
"step": 15516
},
{
"epoch": 0.98,
"grad_norm": 0.9089422821998596,
"learning_rate": 7.502452532922033e-09,
"loss": 0.5445,
"step": 15517
},
{
"epoch": 0.98,
"grad_norm": 0.9493569135665894,
"learning_rate": 7.446373553705033e-09,
"loss": 0.601,
"step": 15518
},
{
"epoch": 0.98,
"grad_norm": 0.8956559896469116,
"learning_rate": 7.390504793552966e-09,
"loss": 0.6217,
"step": 15519
},
{
"epoch": 0.98,
"grad_norm": 0.8692125082015991,
"learning_rate": 7.3348462548183955e-09,
"loss": 0.587,
"step": 15520
},
{
"epoch": 0.98,
"grad_norm": 0.9651332497596741,
"learning_rate": 7.279397939845556e-09,
"loss": 0.5755,
"step": 15521
},
{
"epoch": 0.98,
"grad_norm": 0.9648064374923706,
"learning_rate": 7.2241598509686926e-09,
"loss": 0.5921,
"step": 15522
},
{
"epoch": 0.98,
"grad_norm": 0.8698114156723022,
"learning_rate": 7.169131990514278e-09,
"loss": 0.6096,
"step": 15523
},
{
"epoch": 0.98,
"grad_norm": 0.8747323751449585,
"learning_rate": 7.114314360798791e-09,
"loss": 0.6015,
"step": 15524
},
{
"epoch": 0.98,
"grad_norm": 0.8900646567344666,
"learning_rate": 7.0597069641303865e-09,
"loss": 0.5879,
"step": 15525
},
{
"epoch": 0.98,
"grad_norm": 0.9312129020690918,
"learning_rate": 7.00530980280889e-09,
"loss": 0.5577,
"step": 15526
},
{
"epoch": 0.98,
"grad_norm": 0.8599275946617126,
"learning_rate": 6.951122879124139e-09,
"loss": 0.541,
"step": 15527
},
{
"epoch": 0.98,
"grad_norm": 0.8398301601409912,
"learning_rate": 6.89714619535764e-09,
"loss": 0.5422,
"step": 15528
},
{
"epoch": 0.98,
"grad_norm": 0.8573556542396545,
"learning_rate": 6.84337975378313e-09,
"loss": 0.5242,
"step": 15529
},
{
"epoch": 0.98,
"grad_norm": 0.8811535239219666,
"learning_rate": 6.789823556663799e-09,
"loss": 0.571,
"step": 15530
},
{
"epoch": 0.98,
"grad_norm": 0.8871817588806152,
"learning_rate": 6.736477606255065e-09,
"loss": 0.5309,
"step": 15531
},
{
"epoch": 0.98,
"grad_norm": 0.8854000568389893,
"learning_rate": 6.683341904802909e-09,
"loss": 0.5643,
"step": 15532
},
{
"epoch": 0.98,
"grad_norm": 0.9325771927833557,
"learning_rate": 6.63041645454443e-09,
"loss": 0.5453,
"step": 15533
},
{
"epoch": 0.98,
"grad_norm": 0.9529849290847778,
"learning_rate": 6.577701257708957e-09,
"loss": 0.6482,
"step": 15534
},
{
"epoch": 0.98,
"grad_norm": 0.8886032700538635,
"learning_rate": 6.52519631651527e-09,
"loss": 0.5402,
"step": 15535
},
{
"epoch": 0.98,
"grad_norm": 0.933830201625824,
"learning_rate": 6.4729016331749325e-09,
"loss": 0.5783,
"step": 15536
},
{
"epoch": 0.98,
"grad_norm": 0.8807794451713562,
"learning_rate": 6.420817209888963e-09,
"loss": 0.5386,
"step": 15537
},
{
"epoch": 0.98,
"grad_norm": 0.8794984221458435,
"learning_rate": 6.368943048851162e-09,
"loss": 0.5568,
"step": 15538
},
{
"epoch": 0.98,
"grad_norm": 0.9522714018821716,
"learning_rate": 6.317279152245892e-09,
"loss": 0.555,
"step": 15539
},
{
"epoch": 0.98,
"grad_norm": 0.9179518222808838,
"learning_rate": 6.265825522248082e-09,
"loss": 0.5825,
"step": 15540
},
{
"epoch": 0.98,
"grad_norm": 0.8840945959091187,
"learning_rate": 6.2145821610243296e-09,
"loss": 0.5798,
"step": 15541
},
{
"epoch": 0.98,
"grad_norm": 0.9103140830993652,
"learning_rate": 6.163549070732356e-09,
"loss": 0.6138,
"step": 15542
},
{
"epoch": 0.98,
"grad_norm": 0.9227690696716309,
"learning_rate": 6.1127262535209955e-09,
"loss": 0.6251,
"step": 15543
},
{
"epoch": 0.98,
"grad_norm": 0.9462999105453491,
"learning_rate": 6.062113711530204e-09,
"loss": 0.5747,
"step": 15544
},
{
"epoch": 0.98,
"grad_norm": 0.9513913989067078,
"learning_rate": 6.01171144689161e-09,
"loss": 0.5616,
"step": 15545
},
{
"epoch": 0.98,
"grad_norm": 0.9170199036598206,
"learning_rate": 5.96151946172685e-09,
"loss": 0.6263,
"step": 15546
},
{
"epoch": 0.98,
"grad_norm": 0.8427810668945312,
"learning_rate": 5.911537758149233e-09,
"loss": 0.538,
"step": 15547
},
{
"epoch": 0.99,
"grad_norm": 0.8685246109962463,
"learning_rate": 5.861766338263741e-09,
"loss": 0.5725,
"step": 15548
},
{
"epoch": 0.99,
"grad_norm": 0.9405071139335632,
"learning_rate": 5.812205204165922e-09,
"loss": 0.5458,
"step": 15549
},
{
"epoch": 0.99,
"grad_norm": 0.933382511138916,
"learning_rate": 5.762854357942993e-09,
"loss": 0.6187,
"step": 15550
},
{
"epoch": 0.99,
"grad_norm": 0.8507725596427917,
"learning_rate": 5.7137138016721825e-09,
"loss": 0.529,
"step": 15551
},
{
"epoch": 0.99,
"grad_norm": 0.827217698097229,
"learning_rate": 5.6647835374229465e-09,
"loss": 0.5867,
"step": 15552
},
{
"epoch": 0.99,
"grad_norm": 0.8920066356658936,
"learning_rate": 5.616063567255859e-09,
"loss": 0.6224,
"step": 15553
},
{
"epoch": 0.99,
"grad_norm": 0.9211912751197815,
"learning_rate": 5.5675538932220555e-09,
"loss": 0.6406,
"step": 15554
},
{
"epoch": 0.99,
"grad_norm": 0.8937119245529175,
"learning_rate": 5.519254517364347e-09,
"loss": 0.526,
"step": 15555
},
{
"epoch": 0.99,
"grad_norm": 0.9318338632583618,
"learning_rate": 5.471165441716108e-09,
"loss": 0.5555,
"step": 15556
},
{
"epoch": 0.99,
"grad_norm": 0.8785502910614014,
"learning_rate": 5.4232866683023856e-09,
"loss": 0.5905,
"step": 15557
},
{
"epoch": 0.99,
"grad_norm": 0.870749831199646,
"learning_rate": 5.375618199139343e-09,
"loss": 0.539,
"step": 15558
},
{
"epoch": 0.99,
"grad_norm": 0.923859179019928,
"learning_rate": 5.328160036234264e-09,
"loss": 0.6001,
"step": 15559
},
{
"epoch": 0.99,
"grad_norm": 0.9950880408287048,
"learning_rate": 5.280912181584441e-09,
"loss": 0.6232,
"step": 15560
},
{
"epoch": 0.99,
"grad_norm": 0.887122392654419,
"learning_rate": 5.233874637180503e-09,
"loss": 0.6034,
"step": 15561
},
{
"epoch": 0.99,
"grad_norm": 0.8819499015808105,
"learning_rate": 5.1870474050025325e-09,
"loss": 0.5857,
"step": 15562
},
{
"epoch": 0.99,
"grad_norm": 0.859145998954773,
"learning_rate": 5.1404304870222856e-09,
"loss": 0.5484,
"step": 15563
},
{
"epoch": 0.99,
"grad_norm": 0.8710299134254456,
"learning_rate": 5.094023885203192e-09,
"loss": 0.5355,
"step": 15564
},
{
"epoch": 0.99,
"grad_norm": 0.8982166647911072,
"learning_rate": 5.0478276014981345e-09,
"loss": 0.6073,
"step": 15565
},
{
"epoch": 0.99,
"grad_norm": 0.981425404548645,
"learning_rate": 5.001841637852778e-09,
"loss": 0.5632,
"step": 15566
},
{
"epoch": 0.99,
"grad_norm": 0.865263044834137,
"learning_rate": 4.956065996203907e-09,
"loss": 0.5613,
"step": 15567
},
{
"epoch": 0.99,
"grad_norm": 0.9212016463279724,
"learning_rate": 4.910500678478314e-09,
"loss": 0.5628,
"step": 15568
},
{
"epoch": 0.99,
"grad_norm": 0.9121674299240112,
"learning_rate": 4.865145686595019e-09,
"loss": 0.5908,
"step": 15569
},
{
"epoch": 0.99,
"grad_norm": 0.8445576429367065,
"learning_rate": 4.820001022463605e-09,
"loss": 0.5616,
"step": 15570
},
{
"epoch": 0.99,
"grad_norm": 0.9339314699172974,
"learning_rate": 4.77506668798533e-09,
"loss": 0.5892,
"step": 15571
},
{
"epoch": 0.99,
"grad_norm": 0.8746134638786316,
"learning_rate": 4.730342685051459e-09,
"loss": 0.574,
"step": 15572
},
{
"epoch": 0.99,
"grad_norm": 0.9173433780670166,
"learning_rate": 4.685829015545485e-09,
"loss": 0.5736,
"step": 15573
},
{
"epoch": 0.99,
"grad_norm": 0.9090994596481323,
"learning_rate": 4.641525681342019e-09,
"loss": 0.5846,
"step": 15574
},
{
"epoch": 0.99,
"grad_norm": 0.9413917064666748,
"learning_rate": 4.597432684306236e-09,
"loss": 0.6241,
"step": 15575
},
{
"epoch": 0.99,
"grad_norm": 0.9874243140220642,
"learning_rate": 4.553550026294984e-09,
"loss": 0.6424,
"step": 15576
},
{
"epoch": 0.99,
"grad_norm": 0.8794564604759216,
"learning_rate": 4.5098777091556745e-09,
"loss": 0.5624,
"step": 15577
},
{
"epoch": 0.99,
"grad_norm": 0.8835657238960266,
"learning_rate": 4.4664157347273916e-09,
"loss": 0.5964,
"step": 15578
},
{
"epoch": 0.99,
"grad_norm": 0.8630079627037048,
"learning_rate": 4.423164104840339e-09,
"loss": 0.5759,
"step": 15579
},
{
"epoch": 0.99,
"grad_norm": 0.8491309285163879,
"learning_rate": 4.38012282131528e-09,
"loss": 0.539,
"step": 15580
},
{
"epoch": 0.99,
"grad_norm": 0.8870819807052612,
"learning_rate": 4.3372918859652115e-09,
"loss": 0.5754,
"step": 15581
},
{
"epoch": 0.99,
"grad_norm": 0.9265652894973755,
"learning_rate": 4.294671300592579e-09,
"loss": 0.5735,
"step": 15582
},
{
"epoch": 0.99,
"grad_norm": 0.8699434995651245,
"learning_rate": 4.252261066993169e-09,
"loss": 0.5775,
"step": 15583
},
{
"epoch": 0.99,
"grad_norm": 0.9221080541610718,
"learning_rate": 4.210061186951664e-09,
"loss": 0.6277,
"step": 15584
},
{
"epoch": 0.99,
"grad_norm": 0.9161962866783142,
"learning_rate": 4.168071662245532e-09,
"loss": 0.5884,
"step": 15585
},
{
"epoch": 0.99,
"grad_norm": 0.9073721170425415,
"learning_rate": 4.1262924946422476e-09,
"loss": 0.5669,
"step": 15586
},
{
"epoch": 0.99,
"grad_norm": 0.8920649290084839,
"learning_rate": 4.084723685901515e-09,
"loss": 0.608,
"step": 15587
},
{
"epoch": 0.99,
"grad_norm": 0.8832874298095703,
"learning_rate": 4.043365237774155e-09,
"loss": 0.5981,
"step": 15588
},
{
"epoch": 0.99,
"grad_norm": 0.9273045063018799,
"learning_rate": 4.002217152000443e-09,
"loss": 0.574,
"step": 15589
},
{
"epoch": 0.99,
"grad_norm": 0.8001242280006409,
"learning_rate": 3.961279430313991e-09,
"loss": 0.465,
"step": 15590
},
{
"epoch": 0.99,
"grad_norm": 0.900482714176178,
"learning_rate": 3.920552074437867e-09,
"loss": 0.5724,
"step": 15591
},
{
"epoch": 0.99,
"grad_norm": 0.9371671080589294,
"learning_rate": 3.880035086086808e-09,
"loss": 0.5378,
"step": 15592
},
{
"epoch": 0.99,
"grad_norm": 0.8328535556793213,
"learning_rate": 3.839728466967785e-09,
"loss": 0.5461,
"step": 15593
},
{
"epoch": 0.99,
"grad_norm": 0.9578930139541626,
"learning_rate": 3.799632218777216e-09,
"loss": 0.5931,
"step": 15594
},
{
"epoch": 0.99,
"grad_norm": 0.9057374596595764,
"learning_rate": 3.759746343203751e-09,
"loss": 0.5542,
"step": 15595
},
{
"epoch": 0.99,
"grad_norm": 0.9067438840866089,
"learning_rate": 3.720070841926604e-09,
"loss": 0.5555,
"step": 15596
},
{
"epoch": 0.99,
"grad_norm": 0.8708641529083252,
"learning_rate": 3.6806057166166585e-09,
"loss": 0.5649,
"step": 15597
},
{
"epoch": 0.99,
"grad_norm": 0.857673168182373,
"learning_rate": 3.6413509689353644e-09,
"loss": 0.5904,
"step": 15598
},
{
"epoch": 0.99,
"grad_norm": 0.9187641739845276,
"learning_rate": 3.602306600535843e-09,
"loss": 0.573,
"step": 15599
},
{
"epoch": 0.99,
"grad_norm": 0.9030482172966003,
"learning_rate": 3.5634726130617802e-09,
"loss": 0.6042,
"step": 15600
},
{
"epoch": 0.99,
"grad_norm": 0.814935028553009,
"learning_rate": 3.5248490081485343e-09,
"loss": 0.5411,
"step": 15601
},
{
"epoch": 0.99,
"grad_norm": 0.8016582131385803,
"learning_rate": 3.486435787422582e-09,
"loss": 0.5426,
"step": 15602
},
{
"epoch": 0.99,
"grad_norm": 0.8617429733276367,
"learning_rate": 3.4482329525009627e-09,
"loss": 0.5523,
"step": 15603
},
{
"epoch": 0.99,
"grad_norm": 0.8524816632270813,
"learning_rate": 3.4102405049929455e-09,
"loss": 0.6124,
"step": 15604
},
{
"epoch": 0.99,
"grad_norm": 0.8613601922988892,
"learning_rate": 3.372458446497251e-09,
"loss": 0.5662,
"step": 15605
},
{
"epoch": 0.99,
"grad_norm": 0.9535294771194458,
"learning_rate": 3.3348867786059393e-09,
"loss": 0.5969,
"step": 15606
},
{
"epoch": 0.99,
"grad_norm": 0.8776538372039795,
"learning_rate": 3.2975255028999675e-09,
"loss": 0.5582,
"step": 15607
},
{
"epoch": 0.99,
"grad_norm": 0.9301447868347168,
"learning_rate": 3.2603746209530774e-09,
"loss": 0.5439,
"step": 15608
},
{
"epoch": 0.99,
"grad_norm": 0.8789377212524414,
"learning_rate": 3.223434134329573e-09,
"loss": 0.5597,
"step": 15609
},
{
"epoch": 0.99,
"grad_norm": 0.9617857336997986,
"learning_rate": 3.1867040445848764e-09,
"loss": 0.5971,
"step": 15610
},
{
"epoch": 0.99,
"grad_norm": 0.9199445843696594,
"learning_rate": 3.1501843532649734e-09,
"loss": 0.51,
"step": 15611
},
{
"epoch": 0.99,
"grad_norm": 0.9115186929702759,
"learning_rate": 3.113875061908078e-09,
"loss": 0.5553,
"step": 15612
},
{
"epoch": 0.99,
"grad_norm": 0.9086104035377502,
"learning_rate": 3.077776172043523e-09,
"loss": 0.5724,
"step": 15613
},
{
"epoch": 0.99,
"grad_norm": 0.9141691327095032,
"learning_rate": 3.0418876851900924e-09,
"loss": 0.5002,
"step": 15614
},
{
"epoch": 0.99,
"grad_norm": 0.8640308380126953,
"learning_rate": 3.0062096028599108e-09,
"loss": 0.511,
"step": 15615
},
{
"epoch": 0.99,
"grad_norm": 0.8536925911903381,
"learning_rate": 2.9707419265551097e-09,
"loss": 0.5577,
"step": 15616
},
{
"epoch": 0.99,
"grad_norm": 0.9029596447944641,
"learning_rate": 2.9354846577689387e-09,
"loss": 0.6127,
"step": 15617
},
{
"epoch": 0.99,
"grad_norm": 0.877974271774292,
"learning_rate": 2.900437797986322e-09,
"loss": 0.5218,
"step": 15618
},
{
"epoch": 0.99,
"grad_norm": 0.8660022616386414,
"learning_rate": 2.8656013486821897e-09,
"loss": 0.58,
"step": 15619
},
{
"epoch": 0.99,
"grad_norm": 0.8560828566551208,
"learning_rate": 2.8309753113237025e-09,
"loss": 0.5314,
"step": 15620
},
{
"epoch": 0.99,
"grad_norm": 0.8698206543922424,
"learning_rate": 2.796559687369138e-09,
"loss": 0.5735,
"step": 15621
},
{
"epoch": 0.99,
"grad_norm": 0.8277180790901184,
"learning_rate": 2.7623544782673372e-09,
"loss": 0.5116,
"step": 15622
},
{
"epoch": 0.99,
"grad_norm": 0.8713210225105286,
"learning_rate": 2.7283596854588148e-09,
"loss": 0.583,
"step": 15623
},
{
"epoch": 0.99,
"grad_norm": 0.931766927242279,
"learning_rate": 2.6945753103746475e-09,
"loss": 0.5452,
"step": 15624
},
{
"epoch": 0.99,
"grad_norm": 0.9144072532653809,
"learning_rate": 2.661001354437587e-09,
"loss": 0.5662,
"step": 15625
},
{
"epoch": 0.99,
"grad_norm": 0.9093576073646545,
"learning_rate": 2.6276378190615016e-09,
"loss": 0.56,
"step": 15626
},
{
"epoch": 0.99,
"grad_norm": 0.9563875198364258,
"learning_rate": 2.5944847056508237e-09,
"loss": 0.5346,
"step": 15627
},
{
"epoch": 0.99,
"grad_norm": 0.9016739726066589,
"learning_rate": 2.561542015601659e-09,
"loss": 0.6267,
"step": 15628
},
{
"epoch": 0.99,
"grad_norm": 0.9251307249069214,
"learning_rate": 2.528809750301231e-09,
"loss": 0.5576,
"step": 15629
},
{
"epoch": 0.99,
"grad_norm": 0.9503340721130371,
"learning_rate": 2.4962879111278813e-09,
"loss": 0.5327,
"step": 15630
},
{
"epoch": 0.99,
"grad_norm": 0.8992822766304016,
"learning_rate": 2.4639764994505156e-09,
"loss": 0.5629,
"step": 15631
},
{
"epoch": 0.99,
"grad_norm": 0.8664157390594482,
"learning_rate": 2.4318755166302668e-09,
"loss": 0.5529,
"step": 15632
},
{
"epoch": 0.99,
"grad_norm": 0.837928831577301,
"learning_rate": 2.399984964018276e-09,
"loss": 0.5679,
"step": 15633
},
{
"epoch": 0.99,
"grad_norm": 0.8134336471557617,
"learning_rate": 2.3683048429573587e-09,
"loss": 0.5256,
"step": 15634
},
{
"epoch": 0.99,
"grad_norm": 0.9103096127510071,
"learning_rate": 2.3368351547820023e-09,
"loss": 0.58,
"step": 15635
},
{
"epoch": 0.99,
"grad_norm": 0.8657124638557434,
"learning_rate": 2.3055759008167033e-09,
"loss": 0.5196,
"step": 15636
},
{
"epoch": 0.99,
"grad_norm": 0.8632974028587341,
"learning_rate": 2.2745270823776312e-09,
"loss": 0.5559,
"step": 15637
},
{
"epoch": 0.99,
"grad_norm": 0.9439393877983093,
"learning_rate": 2.243688700772628e-09,
"loss": 0.6166,
"step": 15638
},
{
"epoch": 0.99,
"grad_norm": 0.8731262683868408,
"learning_rate": 2.2130607573001006e-09,
"loss": 0.6254,
"step": 15639
},
{
"epoch": 0.99,
"grad_norm": 0.867850124835968,
"learning_rate": 2.1826432532495724e-09,
"loss": 0.5505,
"step": 15640
},
{
"epoch": 0.99,
"grad_norm": 0.9074274897575378,
"learning_rate": 2.1524361899016853e-09,
"loss": 0.581,
"step": 15641
},
{
"epoch": 0.99,
"grad_norm": 0.8568212389945984,
"learning_rate": 2.1224395685282008e-09,
"loss": 0.6037,
"step": 15642
},
{
"epoch": 0.99,
"grad_norm": 0.8557693362236023,
"learning_rate": 2.0926533903925516e-09,
"loss": 0.5362,
"step": 15643
},
{
"epoch": 0.99,
"grad_norm": 0.8828796744346619,
"learning_rate": 2.0630776567492904e-09,
"loss": 0.573,
"step": 15644
},
{
"epoch": 0.99,
"grad_norm": 0.8889597058296204,
"learning_rate": 2.033712368842977e-09,
"loss": 0.5788,
"step": 15645
},
{
"epoch": 0.99,
"grad_norm": 0.8992346525192261,
"learning_rate": 2.004557527909845e-09,
"loss": 0.5368,
"step": 15646
},
{
"epoch": 0.99,
"grad_norm": 0.8839378356933594,
"learning_rate": 1.975613135178911e-09,
"loss": 0.5774,
"step": 15647
},
{
"epoch": 0.99,
"grad_norm": 0.9571453928947449,
"learning_rate": 1.9468791918675345e-09,
"loss": 0.5711,
"step": 15648
},
{
"epoch": 0.99,
"grad_norm": 0.8286476731300354,
"learning_rate": 1.918355699186414e-09,
"loss": 0.5746,
"step": 15649
},
{
"epoch": 0.99,
"grad_norm": 0.8622028827667236,
"learning_rate": 1.8900426583357003e-09,
"loss": 0.4927,
"step": 15650
},
{
"epoch": 0.99,
"grad_norm": 0.818505048751831,
"learning_rate": 1.861940070508883e-09,
"loss": 0.5347,
"step": 15651
},
{
"epoch": 0.99,
"grad_norm": 0.9037706851959229,
"learning_rate": 1.8340479368883502e-09,
"loss": 0.5652,
"step": 15652
},
{
"epoch": 0.99,
"grad_norm": 0.8619953989982605,
"learning_rate": 1.8063662586481622e-09,
"loss": 0.5376,
"step": 15653
},
{
"epoch": 0.99,
"grad_norm": 0.8764271140098572,
"learning_rate": 1.7788950369551638e-09,
"loss": 0.544,
"step": 15654
},
{
"epoch": 0.99,
"grad_norm": 0.9021615386009216,
"learning_rate": 1.751634272964542e-09,
"loss": 0.5352,
"step": 15655
},
{
"epoch": 0.99,
"grad_norm": 0.9306265115737915,
"learning_rate": 1.7245839678259323e-09,
"loss": 0.549,
"step": 15656
},
{
"epoch": 0.99,
"grad_norm": 0.9229673743247986,
"learning_rate": 1.6977441226767589e-09,
"loss": 0.6132,
"step": 15657
},
{
"epoch": 0.99,
"grad_norm": 0.8322945237159729,
"learning_rate": 1.6711147386477833e-09,
"loss": 0.4952,
"step": 15658
},
{
"epoch": 0.99,
"grad_norm": 0.9024680852890015,
"learning_rate": 1.644695816860331e-09,
"loss": 0.5401,
"step": 15659
},
{
"epoch": 0.99,
"grad_norm": 0.86787348985672,
"learning_rate": 1.6184873584268457e-09,
"loss": 0.6113,
"step": 15660
},
{
"epoch": 0.99,
"grad_norm": 0.9406611323356628,
"learning_rate": 1.5924893644503336e-09,
"loss": 0.6352,
"step": 15661
},
{
"epoch": 0.99,
"grad_norm": 0.9413818120956421,
"learning_rate": 1.566701836026585e-09,
"loss": 0.5955,
"step": 15662
},
{
"epoch": 0.99,
"grad_norm": 0.8670917749404907,
"learning_rate": 1.541124774240288e-09,
"loss": 0.5796,
"step": 15663
},
{
"epoch": 0.99,
"grad_norm": 0.850266695022583,
"learning_rate": 1.5157581801689137e-09,
"loss": 0.5393,
"step": 15664
},
{
"epoch": 0.99,
"grad_norm": 0.9164510369300842,
"learning_rate": 1.4906020548804968e-09,
"loss": 0.5778,
"step": 15665
},
{
"epoch": 0.99,
"grad_norm": 0.8657138347625732,
"learning_rate": 1.4656563994341898e-09,
"loss": 0.5405,
"step": 15666
},
{
"epoch": 0.99,
"grad_norm": 0.925816535949707,
"learning_rate": 1.4409212148802643e-09,
"loss": 0.6178,
"step": 15667
},
{
"epoch": 0.99,
"grad_norm": 0.8860337138175964,
"learning_rate": 1.416396502260664e-09,
"loss": 0.5587,
"step": 15668
},
{
"epoch": 0.99,
"grad_norm": 0.9135521054267883,
"learning_rate": 1.3920822626078967e-09,
"loss": 0.5695,
"step": 15669
},
{
"epoch": 0.99,
"grad_norm": 0.8713152408599854,
"learning_rate": 1.3679784969461429e-09,
"loss": 0.601,
"step": 15670
},
{
"epoch": 0.99,
"grad_norm": 0.8884409666061401,
"learning_rate": 1.3440852062890364e-09,
"loss": 0.5773,
"step": 15671
},
{
"epoch": 0.99,
"grad_norm": 0.8426517248153687,
"learning_rate": 1.3204023916435494e-09,
"loss": 0.5539,
"step": 15672
},
{
"epoch": 0.99,
"grad_norm": 0.9166735410690308,
"learning_rate": 1.2969300540072171e-09,
"loss": 0.5526,
"step": 15673
},
{
"epoch": 0.99,
"grad_norm": 0.8987283706665039,
"learning_rate": 1.2736681943675833e-09,
"loss": 0.562,
"step": 15674
},
{
"epoch": 0.99,
"grad_norm": 0.8762261867523193,
"learning_rate": 1.2506168137049747e-09,
"loss": 0.5946,
"step": 15675
},
{
"epoch": 0.99,
"grad_norm": 0.8147190809249878,
"learning_rate": 1.2277759129886158e-09,
"loss": 0.5149,
"step": 15676
},
{
"epoch": 0.99,
"grad_norm": 0.8669753670692444,
"learning_rate": 1.2051454931816254e-09,
"loss": 0.544,
"step": 15677
},
{
"epoch": 0.99,
"grad_norm": 0.9210073351860046,
"learning_rate": 1.1827255552365745e-09,
"loss": 0.5321,
"step": 15678
},
{
"epoch": 0.99,
"grad_norm": 0.9330329895019531,
"learning_rate": 1.1605161000971532e-09,
"loss": 0.5983,
"step": 15679
},
{
"epoch": 0.99,
"grad_norm": 0.8832536935806274,
"learning_rate": 1.1385171286992791e-09,
"loss": 0.5729,
"step": 15680
},
{
"epoch": 0.99,
"grad_norm": 0.8871389627456665,
"learning_rate": 1.116728641967768e-09,
"loss": 0.6509,
"step": 15681
},
{
"epoch": 0.99,
"grad_norm": 0.9270039200782776,
"learning_rate": 1.0951506408213298e-09,
"loss": 0.588,
"step": 15682
},
{
"epoch": 0.99,
"grad_norm": 0.8575807213783264,
"learning_rate": 1.0737831261686815e-09,
"loss": 0.6011,
"step": 15683
},
{
"epoch": 0.99,
"grad_norm": 0.8653765320777893,
"learning_rate": 1.052626098907994e-09,
"loss": 0.546,
"step": 15684
},
{
"epoch": 0.99,
"grad_norm": 0.9024002552032471,
"learning_rate": 1.0316795599318862e-09,
"loss": 0.5587,
"step": 15685
},
{
"epoch": 0.99,
"grad_norm": 0.9174841046333313,
"learning_rate": 1.0109435101218757e-09,
"loss": 0.5712,
"step": 15686
},
{
"epoch": 0.99,
"grad_norm": 0.9049075841903687,
"learning_rate": 9.90417950350042e-10,
"loss": 0.5595,
"step": 15687
},
{
"epoch": 0.99,
"grad_norm": 0.873509407043457,
"learning_rate": 9.701028814818047e-10,
"loss": 0.5713,
"step": 15688
},
{
"epoch": 0.99,
"grad_norm": 0.9006879329681396,
"learning_rate": 9.499983043720351e-10,
"loss": 0.5857,
"step": 15689
},
{
"epoch": 0.99,
"grad_norm": 0.9213382601737976,
"learning_rate": 9.301042198678334e-10,
"loss": 0.5795,
"step": 15690
},
{
"epoch": 0.99,
"grad_norm": 0.9829197525978088,
"learning_rate": 9.104206288057527e-10,
"loss": 0.5673,
"step": 15691
},
{
"epoch": 0.99,
"grad_norm": 0.8557707071304321,
"learning_rate": 8.909475320156846e-10,
"loss": 0.5127,
"step": 15692
},
{
"epoch": 0.99,
"grad_norm": 0.9053747057914734,
"learning_rate": 8.71684930317529e-10,
"loss": 0.5696,
"step": 15693
},
{
"epoch": 0.99,
"grad_norm": 0.8902594447135925,
"learning_rate": 8.526328245217485e-10,
"loss": 0.5673,
"step": 15694
},
{
"epoch": 0.99,
"grad_norm": 0.86614990234375,
"learning_rate": 8.337912154304795e-10,
"loss": 0.5927,
"step": 15695
},
{
"epoch": 0.99,
"grad_norm": 0.8675297498703003,
"learning_rate": 8.151601038375312e-10,
"loss": 0.5788,
"step": 15696
},
{
"epoch": 0.99,
"grad_norm": 0.8987656831741333,
"learning_rate": 7.967394905278314e-10,
"loss": 0.5593,
"step": 15697
},
{
"epoch": 0.99,
"grad_norm": 0.8395030498504639,
"learning_rate": 7.785293762757607e-10,
"loss": 0.5587,
"step": 15698
},
{
"epoch": 0.99,
"grad_norm": 0.8946781754493713,
"learning_rate": 7.605297618495932e-10,
"loss": 0.6318,
"step": 15699
},
{
"epoch": 0.99,
"grad_norm": 0.917945384979248,
"learning_rate": 7.427406480059463e-10,
"loss": 0.6166,
"step": 15700
},
{
"epoch": 0.99,
"grad_norm": 0.8553743362426758,
"learning_rate": 7.251620354942201e-10,
"loss": 0.5548,
"step": 15701
},
{
"epoch": 0.99,
"grad_norm": 0.9134872555732727,
"learning_rate": 7.077939250549337e-10,
"loss": 0.5561,
"step": 15702
},
{
"epoch": 0.99,
"grad_norm": 0.8560099601745605,
"learning_rate": 6.906363174191688e-10,
"loss": 0.5588,
"step": 15703
},
{
"epoch": 0.99,
"grad_norm": 0.8506429195404053,
"learning_rate": 6.736892133091255e-10,
"loss": 0.496,
"step": 15704
},
{
"epoch": 0.99,
"grad_norm": 0.8329866528511047,
"learning_rate": 6.569526134392324e-10,
"loss": 0.4726,
"step": 15705
},
{
"epoch": 1.0,
"grad_norm": 0.8823480606079102,
"learning_rate": 6.404265185128155e-10,
"loss": 0.6387,
"step": 15706
},
{
"epoch": 1.0,
"grad_norm": 0.8894028067588806,
"learning_rate": 6.241109292270953e-10,
"loss": 0.5589,
"step": 15707
},
{
"epoch": 1.0,
"grad_norm": 0.9294013977050781,
"learning_rate": 6.080058462687444e-10,
"loss": 0.5855,
"step": 15708
},
{
"epoch": 1.0,
"grad_norm": 0.8650762438774109,
"learning_rate": 5.92111270314999e-10,
"loss": 0.5452,
"step": 15709
},
{
"epoch": 1.0,
"grad_norm": 0.8934659361839294,
"learning_rate": 5.764272020358785e-10,
"loss": 0.5798,
"step": 15710
},
{
"epoch": 1.0,
"grad_norm": 1.0057034492492676,
"learning_rate": 5.609536420919659e-10,
"loss": 0.5901,
"step": 15711
},
{
"epoch": 1.0,
"grad_norm": 0.9346972703933716,
"learning_rate": 5.456905911344068e-10,
"loss": 0.5695,
"step": 15712
},
{
"epoch": 1.0,
"grad_norm": 0.9616386294364929,
"learning_rate": 5.306380498060204e-10,
"loss": 0.5871,
"step": 15713
},
{
"epoch": 1.0,
"grad_norm": 0.8900029063224792,
"learning_rate": 5.15796018740744e-10,
"loss": 0.5773,
"step": 15714
},
{
"epoch": 1.0,
"grad_norm": 0.9126656651496887,
"learning_rate": 5.011644985630781e-10,
"loss": 0.5591,
"step": 15715
},
{
"epoch": 1.0,
"grad_norm": 0.9090896248817444,
"learning_rate": 4.867434898891965e-10,
"loss": 0.5804,
"step": 15716
},
{
"epoch": 1.0,
"grad_norm": 0.8257348537445068,
"learning_rate": 4.725329933269463e-10,
"loss": 0.5875,
"step": 15717
},
{
"epoch": 1.0,
"grad_norm": 0.9159669876098633,
"learning_rate": 4.5853300947418247e-10,
"loss": 0.5699,
"step": 15718
},
{
"epoch": 1.0,
"grad_norm": 0.8885412812232971,
"learning_rate": 4.4474353892043356e-10,
"loss": 0.615,
"step": 15719
},
{
"epoch": 1.0,
"grad_norm": 0.8214197158813477,
"learning_rate": 4.311645822463462e-10,
"loss": 0.5229,
"step": 15720
},
{
"epoch": 1.0,
"grad_norm": 0.9046826362609863,
"learning_rate": 4.177961400236852e-10,
"loss": 0.5779,
"step": 15721
},
{
"epoch": 1.0,
"grad_norm": 0.8636698722839355,
"learning_rate": 4.046382128147786e-10,
"loss": 0.5556,
"step": 15722
},
{
"epoch": 1.0,
"grad_norm": 1.039339542388916,
"learning_rate": 3.916908011747378e-10,
"loss": 0.6415,
"step": 15723
},
{
"epoch": 1.0,
"grad_norm": 0.8799046874046326,
"learning_rate": 3.7895390564868237e-10,
"loss": 0.5939,
"step": 15724
},
{
"epoch": 1.0,
"grad_norm": 0.855849027633667,
"learning_rate": 3.664275267717399e-10,
"loss": 0.5676,
"step": 15725
},
{
"epoch": 1.0,
"grad_norm": 0.8732782602310181,
"learning_rate": 3.541116650723764e-10,
"loss": 0.5287,
"step": 15726
},
{
"epoch": 1.0,
"grad_norm": 0.8243375420570374,
"learning_rate": 3.4200632106906605e-10,
"loss": 0.5535,
"step": 15727
},
{
"epoch": 1.0,
"grad_norm": 0.8324832320213318,
"learning_rate": 3.301114952708462e-10,
"loss": 0.5476,
"step": 15728
},
{
"epoch": 1.0,
"grad_norm": 0.9179962873458862,
"learning_rate": 3.1842718817953755e-10,
"loss": 0.6169,
"step": 15729
},
{
"epoch": 1.0,
"grad_norm": 0.9185804724693298,
"learning_rate": 3.0695340028641383e-10,
"loss": 0.6479,
"step": 15730
},
{
"epoch": 1.0,
"grad_norm": 0.9088355302810669,
"learning_rate": 2.956901320744221e-10,
"loss": 0.5929,
"step": 15731
},
{
"epoch": 1.0,
"grad_norm": 0.8863728642463684,
"learning_rate": 2.8463738401873776e-10,
"loss": 0.5875,
"step": 15732
},
{
"epoch": 1.0,
"grad_norm": 0.9010648727416992,
"learning_rate": 2.7379515658398915e-10,
"loss": 0.5564,
"step": 15733
},
{
"epoch": 1.0,
"grad_norm": 0.9122373461723328,
"learning_rate": 2.6316345022703307e-10,
"loss": 0.5589,
"step": 15734
},
{
"epoch": 1.0,
"grad_norm": 0.9271931052207947,
"learning_rate": 2.5274226539584445e-10,
"loss": 0.5492,
"step": 15735
},
{
"epoch": 1.0,
"grad_norm": 0.8589327931404114,
"learning_rate": 2.4253160252840636e-10,
"loss": 0.4886,
"step": 15736
},
{
"epoch": 1.0,
"grad_norm": 0.9020355343818665,
"learning_rate": 2.3253146205493014e-10,
"loss": 0.5461,
"step": 15737
},
{
"epoch": 1.0,
"grad_norm": 0.8690382838249207,
"learning_rate": 2.227418443967455e-10,
"loss": 0.5784,
"step": 15738
},
{
"epoch": 1.0,
"grad_norm": 0.9112160801887512,
"learning_rate": 2.1316274996630026e-10,
"loss": 0.6373,
"step": 15739
},
{
"epoch": 1.0,
"grad_norm": 0.8418201208114624,
"learning_rate": 2.0379417916605027e-10,
"loss": 0.5148,
"step": 15740
},
{
"epoch": 1.0,
"grad_norm": 0.9191485047340393,
"learning_rate": 1.946361323912349e-10,
"loss": 0.5662,
"step": 15741
},
{
"epoch": 1.0,
"grad_norm": 0.9604305028915405,
"learning_rate": 1.856886100276567e-10,
"loss": 0.5844,
"step": 15742
},
{
"epoch": 1.0,
"grad_norm": 0.8791788816452026,
"learning_rate": 1.7695161245112613e-10,
"loss": 0.5593,
"step": 15743
},
{
"epoch": 1.0,
"grad_norm": 0.9725036025047302,
"learning_rate": 1.6842514003023724e-10,
"loss": 0.5887,
"step": 15744
},
{
"epoch": 1.0,
"grad_norm": 0.8569048047065735,
"learning_rate": 1.6010919312359208e-10,
"loss": 0.5369,
"step": 15745
},
{
"epoch": 1.0,
"grad_norm": 0.8856031894683838,
"learning_rate": 1.520037720820211e-10,
"loss": 0.5598,
"step": 15746
},
{
"epoch": 1.0,
"grad_norm": 0.8477560877799988,
"learning_rate": 1.4410887724580768e-10,
"loss": 0.5036,
"step": 15747
},
{
"epoch": 1.0,
"grad_norm": 0.8959560394287109,
"learning_rate": 1.3642450894801872e-10,
"loss": 0.5094,
"step": 15748
},
{
"epoch": 1.0,
"grad_norm": 0.896802544593811,
"learning_rate": 1.2895066751283935e-10,
"loss": 0.5458,
"step": 15749
},
{
"epoch": 1.0,
"grad_norm": 0.8456780910491943,
"learning_rate": 1.2168735325335246e-10,
"loss": 0.5843,
"step": 15750
},
{
"epoch": 1.0,
"grad_norm": 0.8784763813018799,
"learning_rate": 1.1463456647708982e-10,
"loss": 0.5836,
"step": 15751
},
{
"epoch": 1.0,
"grad_norm": 0.900086522102356,
"learning_rate": 1.0779230747992587e-10,
"loss": 0.583,
"step": 15752
},
{
"epoch": 1.0,
"grad_norm": 0.9559805393218994,
"learning_rate": 1.0116057654996348e-10,
"loss": 0.6462,
"step": 15753
},
{
"epoch": 1.0,
"grad_norm": 0.9030122756958008,
"learning_rate": 9.473937396697885e-11,
"loss": 0.5079,
"step": 15754
},
{
"epoch": 1.0,
"grad_norm": 0.8667322993278503,
"learning_rate": 8.85287000013113e-11,
"loss": 0.5711,
"step": 15755
},
{
"epoch": 1.0,
"grad_norm": 0.9083278179168701,
"learning_rate": 8.252855491386325e-11,
"loss": 0.5124,
"step": 15756
},
{
"epoch": 1.0,
"grad_norm": 0.8988203406333923,
"learning_rate": 7.673893895776551e-11,
"loss": 0.5766,
"step": 15757
},
{
"epoch": 1.0,
"grad_norm": 0.9265889525413513,
"learning_rate": 7.115985237726719e-11,
"loss": 0.605,
"step": 15758
},
{
"epoch": 1.0,
"grad_norm": 0.8614688515663147,
"learning_rate": 6.57912954060702e-11,
"loss": 0.5763,
"step": 15759
},
{
"epoch": 1.0,
"grad_norm": 0.8665549755096436,
"learning_rate": 6.063326827121518e-11,
"loss": 0.5447,
"step": 15760
},
{
"epoch": 1.0,
"grad_norm": 0.9184945225715637,
"learning_rate": 5.5685771189750714e-11,
"loss": 0.5477,
"step": 15761
},
{
"epoch": 1.0,
"grad_norm": 0.9115839004516602,
"learning_rate": 5.0948804369843616e-11,
"loss": 0.5807,
"step": 15762
},
{
"epoch": 1.0,
"grad_norm": 0.865247905254364,
"learning_rate": 4.642236801022382e-11,
"loss": 0.59,
"step": 15763
},
{
"epoch": 1.0,
"grad_norm": 0.9395208358764648,
"learning_rate": 4.210646230295989e-11,
"loss": 0.553,
"step": 15764
},
{
"epoch": 1.0,
"grad_norm": 0.8607648611068726,
"learning_rate": 3.800108742846309e-11,
"loss": 0.5839,
"step": 15765
},
{
"epoch": 1.0,
"grad_norm": 0.9533546566963196,
"learning_rate": 3.410624356048331e-11,
"loss": 0.6237,
"step": 15766
},
{
"epoch": 1.0,
"grad_norm": 0.8995264768600464,
"learning_rate": 3.0421930862778446e-11,
"loss": 0.5904,
"step": 15767
},
{
"epoch": 1.0,
"grad_norm": 0.8825034499168396,
"learning_rate": 2.6948149490224618e-11,
"loss": 0.5683,
"step": 15768
},
{
"epoch": 1.0,
"grad_norm": 0.8862566351890564,
"learning_rate": 2.3684899589371256e-11,
"loss": 0.5178,
"step": 15769
},
{
"epoch": 1.0,
"grad_norm": 0.9058730006217957,
"learning_rate": 2.063218129733091e-11,
"loss": 0.5956,
"step": 15770
},
{
"epoch": 1.0,
"grad_norm": 0.9475806355476379,
"learning_rate": 1.7789994742889448e-11,
"loss": 0.6292,
"step": 15771
},
{
"epoch": 1.0,
"grad_norm": 0.8897664546966553,
"learning_rate": 1.5158340045395847e-11,
"loss": 0.5825,
"step": 15772
},
{
"epoch": 1.0,
"grad_norm": 0.8877468705177307,
"learning_rate": 1.2737217315872407e-11,
"loss": 0.572,
"step": 15773
},
{
"epoch": 1.0,
"grad_norm": 0.9111242294311523,
"learning_rate": 1.0526626656459648e-11,
"loss": 0.557,
"step": 15774
},
{
"epoch": 1.0,
"grad_norm": 0.8383316397666931,
"learning_rate": 8.526568160416304e-12,
"loss": 0.5977,
"step": 15775
},
{
"epoch": 1.0,
"grad_norm": 0.9318212270736694,
"learning_rate": 6.737041911564213e-12,
"loss": 0.608,
"step": 15776
},
{
"epoch": 1.0,
"grad_norm": 0.901352047920227,
"learning_rate": 5.158047984843428e-12,
"loss": 0.5461,
"step": 15777
},
{
"epoch": 1.0,
"grad_norm": 0.8835100531578064,
"learning_rate": 3.789586447422444e-12,
"loss": 0.5323,
"step": 15778
},
{
"epoch": 1.0,
"grad_norm": 0.8877159953117371,
"learning_rate": 2.631657356477746e-12,
"loss": 0.5907,
"step": 15779
},
{
"epoch": 1.0,
"grad_norm": 0.8495984077453613,
"learning_rate": 1.6842607614142582e-12,
"loss": 0.5492,
"step": 15780
},
{
"epoch": 1.0,
"grad_norm": 0.9034891724586487,
"learning_rate": 9.473967016448982e-13,
"loss": 0.6355,
"step": 15781
},
{
"epoch": 1.0,
"grad_norm": 0.9179747104644775,
"learning_rate": 4.210652082559108e-13,
"loss": 0.5649,
"step": 15782
},
{
"epoch": 1.0,
"grad_norm": 0.877388060092926,
"learning_rate": 1.0526630289664496e-13,
"loss": 0.5585,
"step": 15783
},
{
"epoch": 1.0,
"grad_norm": 0.8421609997749329,
"learning_rate": 0.0,
"loss": 0.5535,
"step": 15784
},
{
"epoch": 1.0,
"step": 15784,
"total_flos": 8.263869225865576e+18,
"train_loss": 0.6036004031589656,
"train_runtime": 161757.4173,
"train_samples_per_second": 24.981,
"train_steps_per_second": 0.098
}
],
"logging_steps": 1.0,
"max_steps": 15784,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 8000,
"total_flos": 8.263869225865576e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}