VideoChat-Flash-Qwen2-7B_re.../trainer_state.json

32525 lines
727 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9999461468038128,
"eval_steps": 500,
"global_step": 4642,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 0.3680746555328369,
"learning_rate": 7.142857142857144e-08,
"loss": 0.6038,
"step": 1
},
{
"epoch": 0.0,
"grad_norm": 0.5339280366897583,
"learning_rate": 1.4285714285714287e-07,
"loss": 0.6378,
"step": 2
},
{
"epoch": 0.0,
"grad_norm": 0.68046635389328,
"learning_rate": 2.142857142857143e-07,
"loss": 0.6492,
"step": 3
},
{
"epoch": 0.0,
"grad_norm": 0.4668383002281189,
"learning_rate": 2.8571428571428575e-07,
"loss": 0.5478,
"step": 4
},
{
"epoch": 0.0,
"grad_norm": 0.7353125810623169,
"learning_rate": 3.5714285714285716e-07,
"loss": 0.5802,
"step": 5
},
{
"epoch": 0.0,
"grad_norm": 0.4221579134464264,
"learning_rate": 4.285714285714286e-07,
"loss": 0.5863,
"step": 6
},
{
"epoch": 0.0,
"grad_norm": 0.35981830954551697,
"learning_rate": 5.000000000000001e-07,
"loss": 0.5881,
"step": 7
},
{
"epoch": 0.0,
"grad_norm": 0.49171018600463867,
"learning_rate": 5.714285714285715e-07,
"loss": 0.6196,
"step": 8
},
{
"epoch": 0.0,
"grad_norm": 0.26764529943466187,
"learning_rate": 6.428571428571428e-07,
"loss": 0.6026,
"step": 9
},
{
"epoch": 0.0,
"grad_norm": 0.31956347823143005,
"learning_rate": 7.142857142857143e-07,
"loss": 0.6077,
"step": 10
},
{
"epoch": 0.0,
"grad_norm": 0.46464401483535767,
"learning_rate": 7.857142857142857e-07,
"loss": 0.6144,
"step": 11
},
{
"epoch": 0.0,
"grad_norm": 0.41506412625312805,
"learning_rate": 8.571428571428572e-07,
"loss": 0.5684,
"step": 12
},
{
"epoch": 0.0,
"grad_norm": 0.448373407125473,
"learning_rate": 9.285714285714287e-07,
"loss": 0.5785,
"step": 13
},
{
"epoch": 0.0,
"grad_norm": 0.3355347514152527,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.5688,
"step": 14
},
{
"epoch": 0.0,
"grad_norm": 0.4548545181751251,
"learning_rate": 1.0714285714285714e-06,
"loss": 0.574,
"step": 15
},
{
"epoch": 0.0,
"grad_norm": 0.4471697211265564,
"learning_rate": 1.142857142857143e-06,
"loss": 0.5526,
"step": 16
},
{
"epoch": 0.0,
"grad_norm": 0.5379983186721802,
"learning_rate": 1.2142857142857144e-06,
"loss": 0.5503,
"step": 17
},
{
"epoch": 0.0,
"grad_norm": 0.3886550962924957,
"learning_rate": 1.2857142857142856e-06,
"loss": 0.6074,
"step": 18
},
{
"epoch": 0.0,
"grad_norm": 0.4560534656047821,
"learning_rate": 1.3571428571428572e-06,
"loss": 0.6262,
"step": 19
},
{
"epoch": 0.0,
"grad_norm": 0.3122997283935547,
"learning_rate": 1.4285714285714286e-06,
"loss": 0.6103,
"step": 20
},
{
"epoch": 0.0,
"grad_norm": 0.35939767956733704,
"learning_rate": 1.5e-06,
"loss": 0.6164,
"step": 21
},
{
"epoch": 0.0,
"grad_norm": 0.3751821517944336,
"learning_rate": 1.5714285714285714e-06,
"loss": 0.5534,
"step": 22
},
{
"epoch": 0.0,
"grad_norm": 0.5395365953445435,
"learning_rate": 1.642857142857143e-06,
"loss": 0.5911,
"step": 23
},
{
"epoch": 0.01,
"grad_norm": 0.46072208881378174,
"learning_rate": 1.7142857142857145e-06,
"loss": 0.5986,
"step": 24
},
{
"epoch": 0.01,
"grad_norm": 0.35585564374923706,
"learning_rate": 1.7857142857142859e-06,
"loss": 0.5726,
"step": 25
},
{
"epoch": 0.01,
"grad_norm": 0.5622196197509766,
"learning_rate": 1.8571428571428573e-06,
"loss": 0.6092,
"step": 26
},
{
"epoch": 0.01,
"grad_norm": 0.4780106246471405,
"learning_rate": 1.928571428571429e-06,
"loss": 0.6318,
"step": 27
},
{
"epoch": 0.01,
"grad_norm": 0.4055005609989166,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.5731,
"step": 28
},
{
"epoch": 0.01,
"grad_norm": 0.29330089688301086,
"learning_rate": 2.0714285714285717e-06,
"loss": 0.577,
"step": 29
},
{
"epoch": 0.01,
"grad_norm": 0.4011281132698059,
"learning_rate": 2.1428571428571427e-06,
"loss": 0.5771,
"step": 30
},
{
"epoch": 0.01,
"grad_norm": 0.5358087420463562,
"learning_rate": 2.2142857142857146e-06,
"loss": 0.5324,
"step": 31
},
{
"epoch": 0.01,
"grad_norm": 0.39781442284584045,
"learning_rate": 2.285714285714286e-06,
"loss": 0.5668,
"step": 32
},
{
"epoch": 0.01,
"grad_norm": 0.44512811303138733,
"learning_rate": 2.3571428571428574e-06,
"loss": 0.6232,
"step": 33
},
{
"epoch": 0.01,
"grad_norm": 0.3510986566543579,
"learning_rate": 2.428571428571429e-06,
"loss": 0.5816,
"step": 34
},
{
"epoch": 0.01,
"grad_norm": 0.39098355174064636,
"learning_rate": 2.5e-06,
"loss": 0.5486,
"step": 35
},
{
"epoch": 0.01,
"grad_norm": 0.4460495412349701,
"learning_rate": 2.571428571428571e-06,
"loss": 0.6256,
"step": 36
},
{
"epoch": 0.01,
"grad_norm": 0.5601059794425964,
"learning_rate": 2.642857142857143e-06,
"loss": 0.5408,
"step": 37
},
{
"epoch": 0.01,
"grad_norm": 0.543770432472229,
"learning_rate": 2.7142857142857144e-06,
"loss": 0.5843,
"step": 38
},
{
"epoch": 0.01,
"grad_norm": 0.45234617590904236,
"learning_rate": 2.785714285714286e-06,
"loss": 0.6491,
"step": 39
},
{
"epoch": 0.01,
"grad_norm": 0.35524260997772217,
"learning_rate": 2.8571428571428573e-06,
"loss": 0.5765,
"step": 40
},
{
"epoch": 0.01,
"grad_norm": 0.3411543071269989,
"learning_rate": 2.928571428571429e-06,
"loss": 0.5184,
"step": 41
},
{
"epoch": 0.01,
"grad_norm": 0.2239224910736084,
"learning_rate": 3e-06,
"loss": 0.5921,
"step": 42
},
{
"epoch": 0.01,
"grad_norm": 0.4779617190361023,
"learning_rate": 3.071428571428572e-06,
"loss": 0.5981,
"step": 43
},
{
"epoch": 0.01,
"grad_norm": 0.43023017048835754,
"learning_rate": 3.142857142857143e-06,
"loss": 0.5822,
"step": 44
},
{
"epoch": 0.01,
"grad_norm": 0.5614896416664124,
"learning_rate": 3.2142857142857147e-06,
"loss": 0.5585,
"step": 45
},
{
"epoch": 0.01,
"grad_norm": 0.35685351490974426,
"learning_rate": 3.285714285714286e-06,
"loss": 0.5623,
"step": 46
},
{
"epoch": 0.01,
"grad_norm": 0.2451944649219513,
"learning_rate": 3.357142857142857e-06,
"loss": 0.4998,
"step": 47
},
{
"epoch": 0.01,
"grad_norm": 0.24035461246967316,
"learning_rate": 3.428571428571429e-06,
"loss": 0.5864,
"step": 48
},
{
"epoch": 0.01,
"grad_norm": 0.29300373792648315,
"learning_rate": 3.5e-06,
"loss": 0.5861,
"step": 49
},
{
"epoch": 0.01,
"grad_norm": 0.3160554766654968,
"learning_rate": 3.5714285714285718e-06,
"loss": 0.5769,
"step": 50
},
{
"epoch": 0.01,
"grad_norm": 0.20753253996372223,
"learning_rate": 3.642857142857143e-06,
"loss": 0.5592,
"step": 51
},
{
"epoch": 0.01,
"grad_norm": 0.29364365339279175,
"learning_rate": 3.7142857142857146e-06,
"loss": 0.5855,
"step": 52
},
{
"epoch": 0.01,
"grad_norm": 0.25966310501098633,
"learning_rate": 3.785714285714286e-06,
"loss": 0.5714,
"step": 53
},
{
"epoch": 0.01,
"grad_norm": 0.5970475673675537,
"learning_rate": 3.857142857142858e-06,
"loss": 0.5955,
"step": 54
},
{
"epoch": 0.01,
"grad_norm": 0.2742187976837158,
"learning_rate": 3.928571428571429e-06,
"loss": 0.5982,
"step": 55
},
{
"epoch": 0.01,
"grad_norm": 0.21851637959480286,
"learning_rate": 4.000000000000001e-06,
"loss": 0.5086,
"step": 56
},
{
"epoch": 0.01,
"grad_norm": 0.31623247265815735,
"learning_rate": 4.071428571428572e-06,
"loss": 0.5507,
"step": 57
},
{
"epoch": 0.01,
"grad_norm": 0.21017701923847198,
"learning_rate": 4.1428571428571435e-06,
"loss": 0.5681,
"step": 58
},
{
"epoch": 0.01,
"grad_norm": 0.23857641220092773,
"learning_rate": 4.2142857142857145e-06,
"loss": 0.5873,
"step": 59
},
{
"epoch": 0.01,
"grad_norm": 0.330123633146286,
"learning_rate": 4.2857142857142855e-06,
"loss": 0.5507,
"step": 60
},
{
"epoch": 0.01,
"grad_norm": 0.2590518295764923,
"learning_rate": 4.357142857142857e-06,
"loss": 0.5563,
"step": 61
},
{
"epoch": 0.01,
"grad_norm": 0.22396299242973328,
"learning_rate": 4.428571428571429e-06,
"loss": 0.5208,
"step": 62
},
{
"epoch": 0.01,
"grad_norm": 0.20799732208251953,
"learning_rate": 4.5e-06,
"loss": 0.5636,
"step": 63
},
{
"epoch": 0.01,
"grad_norm": 0.28667014837265015,
"learning_rate": 4.571428571428572e-06,
"loss": 0.5126,
"step": 64
},
{
"epoch": 0.01,
"grad_norm": 0.3183256983757019,
"learning_rate": 4.642857142857144e-06,
"loss": 0.4917,
"step": 65
},
{
"epoch": 0.01,
"grad_norm": 0.34055784344673157,
"learning_rate": 4.714285714285715e-06,
"loss": 0.5869,
"step": 66
},
{
"epoch": 0.01,
"grad_norm": 0.18996137380599976,
"learning_rate": 4.785714285714287e-06,
"loss": 0.5898,
"step": 67
},
{
"epoch": 0.01,
"grad_norm": 0.19642199575901031,
"learning_rate": 4.857142857142858e-06,
"loss": 0.5024,
"step": 68
},
{
"epoch": 0.01,
"grad_norm": 0.2719160318374634,
"learning_rate": 4.928571428571429e-06,
"loss": 0.5239,
"step": 69
},
{
"epoch": 0.02,
"grad_norm": 0.2251090109348297,
"learning_rate": 5e-06,
"loss": 0.5327,
"step": 70
},
{
"epoch": 0.02,
"grad_norm": 0.15708671510219574,
"learning_rate": 5.071428571428571e-06,
"loss": 0.5446,
"step": 71
},
{
"epoch": 0.02,
"grad_norm": 0.2416897416114807,
"learning_rate": 5.142857142857142e-06,
"loss": 0.5972,
"step": 72
},
{
"epoch": 0.02,
"grad_norm": 0.30218374729156494,
"learning_rate": 5.214285714285715e-06,
"loss": 0.5723,
"step": 73
},
{
"epoch": 0.02,
"grad_norm": 0.23136214911937714,
"learning_rate": 5.285714285714286e-06,
"loss": 0.5919,
"step": 74
},
{
"epoch": 0.02,
"grad_norm": 0.29520007967948914,
"learning_rate": 5.357142857142857e-06,
"loss": 0.5443,
"step": 75
},
{
"epoch": 0.02,
"grad_norm": 0.2675969898700714,
"learning_rate": 5.428571428571429e-06,
"loss": 0.6078,
"step": 76
},
{
"epoch": 0.02,
"grad_norm": 0.21040533483028412,
"learning_rate": 5.500000000000001e-06,
"loss": 0.5131,
"step": 77
},
{
"epoch": 0.02,
"grad_norm": 0.21507872641086578,
"learning_rate": 5.571428571428572e-06,
"loss": 0.5537,
"step": 78
},
{
"epoch": 0.02,
"grad_norm": 0.3713940680027008,
"learning_rate": 5.6428571428571435e-06,
"loss": 0.5677,
"step": 79
},
{
"epoch": 0.02,
"grad_norm": 0.2338705062866211,
"learning_rate": 5.7142857142857145e-06,
"loss": 0.5583,
"step": 80
},
{
"epoch": 0.02,
"grad_norm": 0.24568618834018707,
"learning_rate": 5.785714285714286e-06,
"loss": 0.591,
"step": 81
},
{
"epoch": 0.02,
"grad_norm": 0.2607351541519165,
"learning_rate": 5.857142857142858e-06,
"loss": 0.5854,
"step": 82
},
{
"epoch": 0.02,
"grad_norm": 0.25233450531959534,
"learning_rate": 5.928571428571429e-06,
"loss": 0.5435,
"step": 83
},
{
"epoch": 0.02,
"grad_norm": 0.1901504397392273,
"learning_rate": 6e-06,
"loss": 0.5189,
"step": 84
},
{
"epoch": 0.02,
"grad_norm": 0.20455455780029297,
"learning_rate": 6.071428571428571e-06,
"loss": 0.5372,
"step": 85
},
{
"epoch": 0.02,
"grad_norm": 0.2110891193151474,
"learning_rate": 6.142857142857144e-06,
"loss": 0.5386,
"step": 86
},
{
"epoch": 0.02,
"grad_norm": 0.18980112671852112,
"learning_rate": 6.214285714285715e-06,
"loss": 0.5834,
"step": 87
},
{
"epoch": 0.02,
"grad_norm": 0.24649843573570251,
"learning_rate": 6.285714285714286e-06,
"loss": 0.5769,
"step": 88
},
{
"epoch": 0.02,
"grad_norm": 0.20015066862106323,
"learning_rate": 6.357142857142858e-06,
"loss": 0.5213,
"step": 89
},
{
"epoch": 0.02,
"grad_norm": 0.23394432663917542,
"learning_rate": 6.4285714285714295e-06,
"loss": 0.5351,
"step": 90
},
{
"epoch": 0.02,
"grad_norm": 0.23542846739292145,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.5527,
"step": 91
},
{
"epoch": 0.02,
"grad_norm": 0.2840578258037567,
"learning_rate": 6.571428571428572e-06,
"loss": 0.5917,
"step": 92
},
{
"epoch": 0.02,
"grad_norm": 0.5794204473495483,
"learning_rate": 6.642857142857143e-06,
"loss": 0.5693,
"step": 93
},
{
"epoch": 0.02,
"grad_norm": 0.2579974830150604,
"learning_rate": 6.714285714285714e-06,
"loss": 0.577,
"step": 94
},
{
"epoch": 0.02,
"grad_norm": 0.23646292090415955,
"learning_rate": 6.785714285714287e-06,
"loss": 0.5327,
"step": 95
},
{
"epoch": 0.02,
"grad_norm": 0.3467201888561249,
"learning_rate": 6.857142857142858e-06,
"loss": 0.5163,
"step": 96
},
{
"epoch": 0.02,
"grad_norm": 0.1848195195198059,
"learning_rate": 6.928571428571429e-06,
"loss": 0.5614,
"step": 97
},
{
"epoch": 0.02,
"grad_norm": 0.20233601331710815,
"learning_rate": 7e-06,
"loss": 0.5993,
"step": 98
},
{
"epoch": 0.02,
"grad_norm": 0.2592422366142273,
"learning_rate": 7.0714285714285726e-06,
"loss": 0.5568,
"step": 99
},
{
"epoch": 0.02,
"grad_norm": 0.19915878772735596,
"learning_rate": 7.1428571428571436e-06,
"loss": 0.5331,
"step": 100
},
{
"epoch": 0.02,
"grad_norm": 0.24805685877799988,
"learning_rate": 7.2142857142857145e-06,
"loss": 0.5241,
"step": 101
},
{
"epoch": 0.02,
"grad_norm": 0.2214994579553604,
"learning_rate": 7.285714285714286e-06,
"loss": 0.5219,
"step": 102
},
{
"epoch": 0.02,
"grad_norm": 0.2977463901042938,
"learning_rate": 7.357142857142858e-06,
"loss": 0.5836,
"step": 103
},
{
"epoch": 0.02,
"grad_norm": 0.23825155198574066,
"learning_rate": 7.428571428571429e-06,
"loss": 0.5452,
"step": 104
},
{
"epoch": 0.02,
"grad_norm": 0.22910349071025848,
"learning_rate": 7.500000000000001e-06,
"loss": 0.5525,
"step": 105
},
{
"epoch": 0.02,
"grad_norm": 0.24861909449100494,
"learning_rate": 7.571428571428572e-06,
"loss": 0.5602,
"step": 106
},
{
"epoch": 0.02,
"grad_norm": 0.220360666513443,
"learning_rate": 7.642857142857143e-06,
"loss": 0.4968,
"step": 107
},
{
"epoch": 0.02,
"grad_norm": 0.29663270711898804,
"learning_rate": 7.714285714285716e-06,
"loss": 0.547,
"step": 108
},
{
"epoch": 0.02,
"grad_norm": 0.15902388095855713,
"learning_rate": 7.785714285714287e-06,
"loss": 0.5593,
"step": 109
},
{
"epoch": 0.02,
"grad_norm": 0.25408726930618286,
"learning_rate": 7.857142857142858e-06,
"loss": 0.5129,
"step": 110
},
{
"epoch": 0.02,
"grad_norm": 0.25450989603996277,
"learning_rate": 7.928571428571429e-06,
"loss": 0.5568,
"step": 111
},
{
"epoch": 0.02,
"grad_norm": 0.2113712877035141,
"learning_rate": 8.000000000000001e-06,
"loss": 0.5311,
"step": 112
},
{
"epoch": 0.02,
"grad_norm": 0.2673487663269043,
"learning_rate": 8.071428571428572e-06,
"loss": 0.5063,
"step": 113
},
{
"epoch": 0.02,
"grad_norm": 0.17971846461296082,
"learning_rate": 8.142857142857143e-06,
"loss": 0.4959,
"step": 114
},
{
"epoch": 0.02,
"grad_norm": 0.27486327290534973,
"learning_rate": 8.214285714285714e-06,
"loss": 0.504,
"step": 115
},
{
"epoch": 0.02,
"grad_norm": 0.3731400966644287,
"learning_rate": 8.285714285714287e-06,
"loss": 0.5262,
"step": 116
},
{
"epoch": 0.03,
"grad_norm": 0.1998678743839264,
"learning_rate": 8.357142857142858e-06,
"loss": 0.6066,
"step": 117
},
{
"epoch": 0.03,
"grad_norm": 0.18095743656158447,
"learning_rate": 8.428571428571429e-06,
"loss": 0.581,
"step": 118
},
{
"epoch": 0.03,
"grad_norm": 0.20576633512973785,
"learning_rate": 8.5e-06,
"loss": 0.5646,
"step": 119
},
{
"epoch": 0.03,
"grad_norm": 0.21952424943447113,
"learning_rate": 8.571428571428571e-06,
"loss": 0.5274,
"step": 120
},
{
"epoch": 0.03,
"grad_norm": 0.22617046535015106,
"learning_rate": 8.642857142857144e-06,
"loss": 0.4918,
"step": 121
},
{
"epoch": 0.03,
"grad_norm": 0.22353151440620422,
"learning_rate": 8.714285714285715e-06,
"loss": 0.5383,
"step": 122
},
{
"epoch": 0.03,
"grad_norm": 0.24257732927799225,
"learning_rate": 8.785714285714286e-06,
"loss": 0.4734,
"step": 123
},
{
"epoch": 0.03,
"grad_norm": 0.16320379078388214,
"learning_rate": 8.857142857142858e-06,
"loss": 0.5033,
"step": 124
},
{
"epoch": 0.03,
"grad_norm": 0.21186141669750214,
"learning_rate": 8.92857142857143e-06,
"loss": 0.5116,
"step": 125
},
{
"epoch": 0.03,
"grad_norm": 0.1727321892976761,
"learning_rate": 9e-06,
"loss": 0.4887,
"step": 126
},
{
"epoch": 0.03,
"grad_norm": 0.17333361506462097,
"learning_rate": 9.071428571428573e-06,
"loss": 0.5629,
"step": 127
},
{
"epoch": 0.03,
"grad_norm": 0.20159348845481873,
"learning_rate": 9.142857142857144e-06,
"loss": 0.5855,
"step": 128
},
{
"epoch": 0.03,
"grad_norm": 0.25432631373405457,
"learning_rate": 9.214285714285715e-06,
"loss": 0.5565,
"step": 129
},
{
"epoch": 0.03,
"grad_norm": 0.18436311185359955,
"learning_rate": 9.285714285714288e-06,
"loss": 0.4746,
"step": 130
},
{
"epoch": 0.03,
"grad_norm": 0.22167499363422394,
"learning_rate": 9.357142857142859e-06,
"loss": 0.5437,
"step": 131
},
{
"epoch": 0.03,
"grad_norm": 0.29192057251930237,
"learning_rate": 9.42857142857143e-06,
"loss": 0.5443,
"step": 132
},
{
"epoch": 0.03,
"grad_norm": 0.1628040224313736,
"learning_rate": 9.5e-06,
"loss": 0.5563,
"step": 133
},
{
"epoch": 0.03,
"grad_norm": 0.32334551215171814,
"learning_rate": 9.571428571428573e-06,
"loss": 0.575,
"step": 134
},
{
"epoch": 0.03,
"grad_norm": 0.272955983877182,
"learning_rate": 9.642857142857144e-06,
"loss": 0.5363,
"step": 135
},
{
"epoch": 0.03,
"grad_norm": 0.2314363420009613,
"learning_rate": 9.714285714285715e-06,
"loss": 0.5356,
"step": 136
},
{
"epoch": 0.03,
"grad_norm": 0.18768808245658875,
"learning_rate": 9.785714285714286e-06,
"loss": 0.5053,
"step": 137
},
{
"epoch": 0.03,
"grad_norm": 0.22900734841823578,
"learning_rate": 9.857142857142859e-06,
"loss": 0.5594,
"step": 138
},
{
"epoch": 0.03,
"grad_norm": 0.1723155379295349,
"learning_rate": 9.92857142857143e-06,
"loss": 0.5104,
"step": 139
},
{
"epoch": 0.03,
"grad_norm": 0.2596263885498047,
"learning_rate": 1e-05,
"loss": 0.5271,
"step": 140
},
{
"epoch": 0.03,
"grad_norm": 0.15986420214176178,
"learning_rate": 9.999998782612734e-06,
"loss": 0.5382,
"step": 141
},
{
"epoch": 0.03,
"grad_norm": 0.33412984013557434,
"learning_rate": 9.999995130451526e-06,
"loss": 0.4807,
"step": 142
},
{
"epoch": 0.03,
"grad_norm": 0.20340685546398163,
"learning_rate": 9.999989043518153e-06,
"loss": 0.519,
"step": 143
},
{
"epoch": 0.03,
"grad_norm": 0.18798081576824188,
"learning_rate": 9.999980521815582e-06,
"loss": 0.5347,
"step": 144
},
{
"epoch": 0.03,
"grad_norm": 0.20350557565689087,
"learning_rate": 9.99996956534796e-06,
"loss": 0.4913,
"step": 145
},
{
"epoch": 0.03,
"grad_norm": 0.2547079026699066,
"learning_rate": 9.999956174120626e-06,
"loss": 0.5284,
"step": 146
},
{
"epoch": 0.03,
"grad_norm": 0.26818037033081055,
"learning_rate": 9.999940348140098e-06,
"loss": 0.5597,
"step": 147
},
{
"epoch": 0.03,
"grad_norm": 0.1871444284915924,
"learning_rate": 9.999922087414084e-06,
"loss": 0.4857,
"step": 148
},
{
"epoch": 0.03,
"grad_norm": 0.24267414212226868,
"learning_rate": 9.999901391951474e-06,
"loss": 0.5243,
"step": 149
},
{
"epoch": 0.03,
"grad_norm": 0.22753533720970154,
"learning_rate": 9.99987826176235e-06,
"loss": 0.4868,
"step": 150
},
{
"epoch": 0.03,
"grad_norm": 0.17949774861335754,
"learning_rate": 9.99985269685797e-06,
"loss": 0.5477,
"step": 151
},
{
"epoch": 0.03,
"grad_norm": 0.24117450416088104,
"learning_rate": 9.999824697250786e-06,
"loss": 0.5583,
"step": 152
},
{
"epoch": 0.03,
"grad_norm": 0.2758869230747223,
"learning_rate": 9.999794262954432e-06,
"loss": 0.6049,
"step": 153
},
{
"epoch": 0.03,
"grad_norm": 0.22826828062534332,
"learning_rate": 9.999761393983728e-06,
"loss": 0.5437,
"step": 154
},
{
"epoch": 0.03,
"grad_norm": 0.24656014144420624,
"learning_rate": 9.999726090354683e-06,
"loss": 0.5417,
"step": 155
},
{
"epoch": 0.03,
"grad_norm": 0.1714806854724884,
"learning_rate": 9.999688352084482e-06,
"loss": 0.5189,
"step": 156
},
{
"epoch": 0.03,
"grad_norm": 0.17295528948307037,
"learning_rate": 9.999648179191505e-06,
"loss": 0.5478,
"step": 157
},
{
"epoch": 0.03,
"grad_norm": 0.19324244558811188,
"learning_rate": 9.999605571695317e-06,
"loss": 0.5664,
"step": 158
},
{
"epoch": 0.03,
"grad_norm": 0.20624053478240967,
"learning_rate": 9.999560529616661e-06,
"loss": 0.5087,
"step": 159
},
{
"epoch": 0.03,
"grad_norm": 0.26294004917144775,
"learning_rate": 9.999513052977473e-06,
"loss": 0.6106,
"step": 160
},
{
"epoch": 0.03,
"grad_norm": 0.19021935760974884,
"learning_rate": 9.999463141800873e-06,
"loss": 0.4975,
"step": 161
},
{
"epoch": 0.03,
"grad_norm": 0.17529787123203278,
"learning_rate": 9.999410796111163e-06,
"loss": 0.5204,
"step": 162
},
{
"epoch": 0.04,
"grad_norm": 0.19823302328586578,
"learning_rate": 9.999356015933834e-06,
"loss": 0.5312,
"step": 163
},
{
"epoch": 0.04,
"grad_norm": 0.2933864891529083,
"learning_rate": 9.999298801295564e-06,
"loss": 0.5123,
"step": 164
},
{
"epoch": 0.04,
"grad_norm": 0.19900120794773102,
"learning_rate": 9.99923915222421e-06,
"loss": 0.5824,
"step": 165
},
{
"epoch": 0.04,
"grad_norm": 0.18184617161750793,
"learning_rate": 9.99917706874882e-06,
"loss": 0.5623,
"step": 166
},
{
"epoch": 0.04,
"grad_norm": 0.1705755591392517,
"learning_rate": 9.999112550899627e-06,
"loss": 0.5458,
"step": 167
},
{
"epoch": 0.04,
"grad_norm": 0.3029863238334656,
"learning_rate": 9.999045598708047e-06,
"loss": 0.54,
"step": 168
},
{
"epoch": 0.04,
"grad_norm": 0.1956048458814621,
"learning_rate": 9.998976212206683e-06,
"loss": 0.5387,
"step": 169
},
{
"epoch": 0.04,
"grad_norm": 0.1850360929965973,
"learning_rate": 9.998904391429323e-06,
"loss": 0.5085,
"step": 170
},
{
"epoch": 0.04,
"grad_norm": 0.21800455451011658,
"learning_rate": 9.99883013641094e-06,
"loss": 0.5358,
"step": 171
},
{
"epoch": 0.04,
"grad_norm": 0.2191917896270752,
"learning_rate": 9.998753447187693e-06,
"loss": 0.5668,
"step": 172
},
{
"epoch": 0.04,
"grad_norm": 0.22353680431842804,
"learning_rate": 9.998674323796928e-06,
"loss": 0.5358,
"step": 173
},
{
"epoch": 0.04,
"grad_norm": 0.2535366117954254,
"learning_rate": 9.998592766277173e-06,
"loss": 0.5041,
"step": 174
},
{
"epoch": 0.04,
"grad_norm": 0.20851938426494598,
"learning_rate": 9.998508774668142e-06,
"loss": 0.4944,
"step": 175
},
{
"epoch": 0.04,
"grad_norm": 0.1755622774362564,
"learning_rate": 9.998422349010736e-06,
"loss": 0.5156,
"step": 176
},
{
"epoch": 0.04,
"grad_norm": 0.15284371376037598,
"learning_rate": 9.998333489347042e-06,
"loss": 0.5233,
"step": 177
},
{
"epoch": 0.04,
"grad_norm": 0.2053551822900772,
"learning_rate": 9.998242195720327e-06,
"loss": 0.5414,
"step": 178
},
{
"epoch": 0.04,
"grad_norm": 0.18832677602767944,
"learning_rate": 9.99814846817505e-06,
"loss": 0.5724,
"step": 179
},
{
"epoch": 0.04,
"grad_norm": 0.19767887890338898,
"learning_rate": 9.998052306756852e-06,
"loss": 0.5258,
"step": 180
},
{
"epoch": 0.04,
"grad_norm": 0.17682293057441711,
"learning_rate": 9.997953711512556e-06,
"loss": 0.5718,
"step": 181
},
{
"epoch": 0.04,
"grad_norm": 0.23476260900497437,
"learning_rate": 9.997852682490179e-06,
"loss": 0.5566,
"step": 182
},
{
"epoch": 0.04,
"grad_norm": 0.18441180884838104,
"learning_rate": 9.997749219738912e-06,
"loss": 0.583,
"step": 183
},
{
"epoch": 0.04,
"grad_norm": 0.2411859780550003,
"learning_rate": 9.997643323309139e-06,
"loss": 0.4423,
"step": 184
},
{
"epoch": 0.04,
"grad_norm": 0.1665976196527481,
"learning_rate": 9.997534993252427e-06,
"loss": 0.5353,
"step": 185
},
{
"epoch": 0.04,
"grad_norm": 0.1685672253370285,
"learning_rate": 9.997424229621529e-06,
"loss": 0.5073,
"step": 186
},
{
"epoch": 0.04,
"grad_norm": 0.277639776468277,
"learning_rate": 9.99731103247038e-06,
"loss": 0.4813,
"step": 187
},
{
"epoch": 0.04,
"grad_norm": 0.17908422648906708,
"learning_rate": 9.997195401854102e-06,
"loss": 0.5088,
"step": 188
},
{
"epoch": 0.04,
"grad_norm": 0.1873718649148941,
"learning_rate": 9.997077337829003e-06,
"loss": 0.5072,
"step": 189
},
{
"epoch": 0.04,
"grad_norm": 0.256670743227005,
"learning_rate": 9.996956840452573e-06,
"loss": 0.4865,
"step": 190
},
{
"epoch": 0.04,
"grad_norm": 0.27443787455558777,
"learning_rate": 9.996833909783492e-06,
"loss": 0.5466,
"step": 191
},
{
"epoch": 0.04,
"grad_norm": 0.19919687509536743,
"learning_rate": 9.996708545881617e-06,
"loss": 0.5387,
"step": 192
},
{
"epoch": 0.04,
"grad_norm": 0.16513916850090027,
"learning_rate": 9.996580748808e-06,
"loss": 0.5223,
"step": 193
},
{
"epoch": 0.04,
"grad_norm": 0.20502988994121552,
"learning_rate": 9.996450518624868e-06,
"loss": 0.5194,
"step": 194
},
{
"epoch": 0.04,
"grad_norm": 0.18695437908172607,
"learning_rate": 9.99631785539564e-06,
"loss": 0.4778,
"step": 195
},
{
"epoch": 0.04,
"grad_norm": 0.16061006486415863,
"learning_rate": 9.996182759184916e-06,
"loss": 0.5192,
"step": 196
},
{
"epoch": 0.04,
"grad_norm": 0.18725766241550446,
"learning_rate": 9.99604523005848e-06,
"loss": 0.5199,
"step": 197
},
{
"epoch": 0.04,
"grad_norm": 0.1948050707578659,
"learning_rate": 9.995905268083306e-06,
"loss": 0.5511,
"step": 198
},
{
"epoch": 0.04,
"grad_norm": 0.1752336025238037,
"learning_rate": 9.995762873327548e-06,
"loss": 0.5705,
"step": 199
},
{
"epoch": 0.04,
"grad_norm": 0.2874692678451538,
"learning_rate": 9.995618045860545e-06,
"loss": 0.5504,
"step": 200
},
{
"epoch": 0.04,
"grad_norm": 0.18488091230392456,
"learning_rate": 9.99547078575282e-06,
"loss": 0.5219,
"step": 201
},
{
"epoch": 0.04,
"grad_norm": 0.21118810772895813,
"learning_rate": 9.995321093076085e-06,
"loss": 0.6084,
"step": 202
},
{
"epoch": 0.04,
"grad_norm": 0.17937391996383667,
"learning_rate": 9.99516896790323e-06,
"loss": 0.4974,
"step": 203
},
{
"epoch": 0.04,
"grad_norm": 0.24880222976207733,
"learning_rate": 9.995014410308336e-06,
"loss": 0.5524,
"step": 204
},
{
"epoch": 0.04,
"grad_norm": 0.2270919531583786,
"learning_rate": 9.994857420366669e-06,
"loss": 0.5298,
"step": 205
},
{
"epoch": 0.04,
"grad_norm": 0.2064422369003296,
"learning_rate": 9.994697998154668e-06,
"loss": 0.5442,
"step": 206
},
{
"epoch": 0.04,
"grad_norm": 0.18932758271694183,
"learning_rate": 9.994536143749969e-06,
"loss": 0.4992,
"step": 207
},
{
"epoch": 0.04,
"grad_norm": 0.18627791106700897,
"learning_rate": 9.994371857231388e-06,
"loss": 0.5652,
"step": 208
},
{
"epoch": 0.05,
"grad_norm": 0.18181046843528748,
"learning_rate": 9.994205138678923e-06,
"loss": 0.4876,
"step": 209
},
{
"epoch": 0.05,
"grad_norm": 0.19523365795612335,
"learning_rate": 9.99403598817376e-06,
"loss": 0.5355,
"step": 210
},
{
"epoch": 0.05,
"grad_norm": 0.202137753367424,
"learning_rate": 9.993864405798268e-06,
"loss": 0.5474,
"step": 211
},
{
"epoch": 0.05,
"grad_norm": 0.1764814555644989,
"learning_rate": 9.993690391636e-06,
"loss": 0.511,
"step": 212
},
{
"epoch": 0.05,
"grad_norm": 0.36145585775375366,
"learning_rate": 9.99351394577169e-06,
"loss": 0.5303,
"step": 213
},
{
"epoch": 0.05,
"grad_norm": 0.16018763184547424,
"learning_rate": 9.993335068291264e-06,
"loss": 0.5363,
"step": 214
},
{
"epoch": 0.05,
"grad_norm": 0.14477033913135529,
"learning_rate": 9.993153759281824e-06,
"loss": 0.5394,
"step": 215
},
{
"epoch": 0.05,
"grad_norm": 0.20048737525939941,
"learning_rate": 9.99297001883166e-06,
"loss": 0.5348,
"step": 216
},
{
"epoch": 0.05,
"grad_norm": 0.1712445169687271,
"learning_rate": 9.992783847030246e-06,
"loss": 0.5438,
"step": 217
},
{
"epoch": 0.05,
"grad_norm": 0.15328474342823029,
"learning_rate": 9.992595243968238e-06,
"loss": 0.5454,
"step": 218
},
{
"epoch": 0.05,
"grad_norm": 0.22686372697353363,
"learning_rate": 9.992404209737476e-06,
"loss": 0.5648,
"step": 219
},
{
"epoch": 0.05,
"grad_norm": 0.21831001341342926,
"learning_rate": 9.99221074443099e-06,
"loss": 0.5224,
"step": 220
},
{
"epoch": 0.05,
"grad_norm": 0.15410234034061432,
"learning_rate": 9.992014848142984e-06,
"loss": 0.5185,
"step": 221
},
{
"epoch": 0.05,
"grad_norm": 0.20523761212825775,
"learning_rate": 9.991816520968853e-06,
"loss": 0.5687,
"step": 222
},
{
"epoch": 0.05,
"grad_norm": 0.15560153126716614,
"learning_rate": 9.991615763005172e-06,
"loss": 0.5229,
"step": 223
},
{
"epoch": 0.05,
"grad_norm": 0.19702470302581787,
"learning_rate": 9.991412574349704e-06,
"loss": 0.5337,
"step": 224
},
{
"epoch": 0.05,
"grad_norm": 0.24464666843414307,
"learning_rate": 9.991206955101388e-06,
"loss": 0.5367,
"step": 225
},
{
"epoch": 0.05,
"grad_norm": 0.1894879937171936,
"learning_rate": 9.990998905360357e-06,
"loss": 0.5228,
"step": 226
},
{
"epoch": 0.05,
"grad_norm": 0.14452479779720306,
"learning_rate": 9.990788425227915e-06,
"loss": 0.5354,
"step": 227
},
{
"epoch": 0.05,
"grad_norm": 0.23448392748832703,
"learning_rate": 9.990575514806563e-06,
"loss": 0.545,
"step": 228
},
{
"epoch": 0.05,
"grad_norm": 0.20030318200588226,
"learning_rate": 9.990360174199975e-06,
"loss": 0.5239,
"step": 229
},
{
"epoch": 0.05,
"grad_norm": 0.1632775366306305,
"learning_rate": 9.990142403513012e-06,
"loss": 0.5507,
"step": 230
},
{
"epoch": 0.05,
"grad_norm": 0.17613913118839264,
"learning_rate": 9.989922202851722e-06,
"loss": 0.5077,
"step": 231
},
{
"epoch": 0.05,
"grad_norm": 0.1816764920949936,
"learning_rate": 9.989699572323328e-06,
"loss": 0.5121,
"step": 232
},
{
"epoch": 0.05,
"grad_norm": 0.18507419526576996,
"learning_rate": 9.989474512036245e-06,
"loss": 0.5335,
"step": 233
},
{
"epoch": 0.05,
"grad_norm": 0.22486189007759094,
"learning_rate": 9.989247022100065e-06,
"loss": 0.5223,
"step": 234
},
{
"epoch": 0.05,
"grad_norm": 0.19390611350536346,
"learning_rate": 9.989017102625565e-06,
"loss": 0.564,
"step": 235
},
{
"epoch": 0.05,
"grad_norm": 0.22769489884376526,
"learning_rate": 9.988784753724707e-06,
"loss": 0.4891,
"step": 236
},
{
"epoch": 0.05,
"grad_norm": 0.18696601688861847,
"learning_rate": 9.988549975510635e-06,
"loss": 0.5424,
"step": 237
},
{
"epoch": 0.05,
"grad_norm": 0.1786351501941681,
"learning_rate": 9.988312768097673e-06,
"loss": 0.5279,
"step": 238
},
{
"epoch": 0.05,
"grad_norm": 0.19431112706661224,
"learning_rate": 9.988073131601332e-06,
"loss": 0.5463,
"step": 239
},
{
"epoch": 0.05,
"grad_norm": 0.171942800283432,
"learning_rate": 9.987831066138302e-06,
"loss": 0.5208,
"step": 240
},
{
"epoch": 0.05,
"grad_norm": 0.15704870223999023,
"learning_rate": 9.987586571826461e-06,
"loss": 0.5413,
"step": 241
},
{
"epoch": 0.05,
"grad_norm": 0.16401955485343933,
"learning_rate": 9.987339648784866e-06,
"loss": 0.562,
"step": 242
},
{
"epoch": 0.05,
"grad_norm": 0.2467910647392273,
"learning_rate": 9.987090297133756e-06,
"loss": 0.559,
"step": 243
},
{
"epoch": 0.05,
"grad_norm": 0.1753203123807907,
"learning_rate": 9.986838516994555e-06,
"loss": 0.6251,
"step": 244
},
{
"epoch": 0.05,
"grad_norm": 0.21456435322761536,
"learning_rate": 9.986584308489867e-06,
"loss": 0.5495,
"step": 245
},
{
"epoch": 0.05,
"grad_norm": 0.2137192189693451,
"learning_rate": 9.986327671743484e-06,
"loss": 0.5475,
"step": 246
},
{
"epoch": 0.05,
"grad_norm": 0.16317638754844666,
"learning_rate": 9.98606860688037e-06,
"loss": 0.565,
"step": 247
},
{
"epoch": 0.05,
"grad_norm": 0.15220917761325836,
"learning_rate": 9.985807114026684e-06,
"loss": 0.5185,
"step": 248
},
{
"epoch": 0.05,
"grad_norm": 0.2033926397562027,
"learning_rate": 9.98554319330976e-06,
"loss": 0.5255,
"step": 249
},
{
"epoch": 0.05,
"grad_norm": 0.17678335309028625,
"learning_rate": 9.985276844858114e-06,
"loss": 0.5371,
"step": 250
},
{
"epoch": 0.05,
"grad_norm": 0.1734929084777832,
"learning_rate": 9.985008068801446e-06,
"loss": 0.5148,
"step": 251
},
{
"epoch": 0.05,
"grad_norm": 0.19290420413017273,
"learning_rate": 9.984736865270637e-06,
"loss": 0.5077,
"step": 252
},
{
"epoch": 0.05,
"grad_norm": 0.19296742975711823,
"learning_rate": 9.984463234397752e-06,
"loss": 0.5376,
"step": 253
},
{
"epoch": 0.05,
"grad_norm": 0.21208150684833527,
"learning_rate": 9.984187176316038e-06,
"loss": 0.5431,
"step": 254
},
{
"epoch": 0.05,
"grad_norm": 0.16331081092357635,
"learning_rate": 9.983908691159921e-06,
"loss": 0.5494,
"step": 255
},
{
"epoch": 0.06,
"grad_norm": 0.2127610296010971,
"learning_rate": 9.983627779065012e-06,
"loss": 0.5196,
"step": 256
},
{
"epoch": 0.06,
"grad_norm": 0.27896058559417725,
"learning_rate": 9.983344440168101e-06,
"loss": 0.5004,
"step": 257
},
{
"epoch": 0.06,
"grad_norm": 0.2308092564344406,
"learning_rate": 9.983058674607164e-06,
"loss": 0.4996,
"step": 258
},
{
"epoch": 0.06,
"grad_norm": 0.1815384477376938,
"learning_rate": 9.982770482521353e-06,
"loss": 0.5484,
"step": 259
},
{
"epoch": 0.06,
"grad_norm": 0.19610373675823212,
"learning_rate": 9.982479864051005e-06,
"loss": 0.5465,
"step": 260
},
{
"epoch": 0.06,
"grad_norm": 0.21339653432369232,
"learning_rate": 9.982186819337639e-06,
"loss": 0.5318,
"step": 261
},
{
"epoch": 0.06,
"grad_norm": 0.16528427600860596,
"learning_rate": 9.981891348523955e-06,
"loss": 0.5164,
"step": 262
},
{
"epoch": 0.06,
"grad_norm": 0.19075849652290344,
"learning_rate": 9.981593451753833e-06,
"loss": 0.482,
"step": 263
},
{
"epoch": 0.06,
"grad_norm": 0.18467120826244354,
"learning_rate": 9.981293129172334e-06,
"loss": 0.4893,
"step": 264
},
{
"epoch": 0.06,
"grad_norm": 0.2131132185459137,
"learning_rate": 9.980990380925705e-06,
"loss": 0.5839,
"step": 265
},
{
"epoch": 0.06,
"grad_norm": 0.214164599776268,
"learning_rate": 9.980685207161368e-06,
"loss": 0.5351,
"step": 266
},
{
"epoch": 0.06,
"grad_norm": 0.20279422402381897,
"learning_rate": 9.98037760802793e-06,
"loss": 0.5339,
"step": 267
},
{
"epoch": 0.06,
"grad_norm": 0.16691498458385468,
"learning_rate": 9.980067583675177e-06,
"loss": 0.5257,
"step": 268
},
{
"epoch": 0.06,
"grad_norm": 0.19010309875011444,
"learning_rate": 9.97975513425408e-06,
"loss": 0.4422,
"step": 269
},
{
"epoch": 0.06,
"grad_norm": 0.17077746987342834,
"learning_rate": 9.979440259916782e-06,
"loss": 0.5756,
"step": 270
},
{
"epoch": 0.06,
"grad_norm": 0.15563777089118958,
"learning_rate": 9.979122960816617e-06,
"loss": 0.5803,
"step": 271
},
{
"epoch": 0.06,
"grad_norm": 0.1896345168352127,
"learning_rate": 9.978803237108095e-06,
"loss": 0.5307,
"step": 272
},
{
"epoch": 0.06,
"grad_norm": 0.20084036886692047,
"learning_rate": 9.978481088946905e-06,
"loss": 0.4988,
"step": 273
},
{
"epoch": 0.06,
"grad_norm": 0.18005971610546112,
"learning_rate": 9.97815651648992e-06,
"loss": 0.494,
"step": 274
},
{
"epoch": 0.06,
"grad_norm": 0.14255790412425995,
"learning_rate": 9.977829519895193e-06,
"loss": 0.5534,
"step": 275
},
{
"epoch": 0.06,
"grad_norm": 0.1580318808555603,
"learning_rate": 9.977500099321956e-06,
"loss": 0.5083,
"step": 276
},
{
"epoch": 0.06,
"grad_norm": 0.20587489008903503,
"learning_rate": 9.977168254930621e-06,
"loss": 0.5438,
"step": 277
},
{
"epoch": 0.06,
"grad_norm": 0.18426474928855896,
"learning_rate": 9.97683398688278e-06,
"loss": 0.5399,
"step": 278
},
{
"epoch": 0.06,
"grad_norm": 0.17722034454345703,
"learning_rate": 9.976497295341212e-06,
"loss": 0.4957,
"step": 279
},
{
"epoch": 0.06,
"grad_norm": 0.216731995344162,
"learning_rate": 9.976158180469866e-06,
"loss": 0.5127,
"step": 280
},
{
"epoch": 0.06,
"grad_norm": 0.20815429091453552,
"learning_rate": 9.975816642433876e-06,
"loss": 0.5859,
"step": 281
},
{
"epoch": 0.06,
"grad_norm": 0.15470731258392334,
"learning_rate": 9.975472681399556e-06,
"loss": 0.5417,
"step": 282
},
{
"epoch": 0.06,
"grad_norm": 0.17505955696105957,
"learning_rate": 9.975126297534399e-06,
"loss": 0.5197,
"step": 283
},
{
"epoch": 0.06,
"grad_norm": 0.15607048571109772,
"learning_rate": 9.97477749100708e-06,
"loss": 0.5202,
"step": 284
},
{
"epoch": 0.06,
"grad_norm": 0.25984108448028564,
"learning_rate": 9.97442626198745e-06,
"loss": 0.5113,
"step": 285
},
{
"epoch": 0.06,
"grad_norm": 0.17469698190689087,
"learning_rate": 9.974072610646543e-06,
"loss": 0.5274,
"step": 286
},
{
"epoch": 0.06,
"grad_norm": 0.1947067826986313,
"learning_rate": 9.973716537156573e-06,
"loss": 0.5743,
"step": 287
},
{
"epoch": 0.06,
"grad_norm": 0.16918258368968964,
"learning_rate": 9.973358041690926e-06,
"loss": 0.5623,
"step": 288
},
{
"epoch": 0.06,
"grad_norm": 0.1726803183555603,
"learning_rate": 9.972997124424179e-06,
"loss": 0.5577,
"step": 289
},
{
"epoch": 0.06,
"grad_norm": 0.22636979818344116,
"learning_rate": 9.972633785532082e-06,
"loss": 0.4822,
"step": 290
},
{
"epoch": 0.06,
"grad_norm": 0.1924733966588974,
"learning_rate": 9.972268025191561e-06,
"loss": 0.5294,
"step": 291
},
{
"epoch": 0.06,
"grad_norm": 0.16325077414512634,
"learning_rate": 9.971899843580728e-06,
"loss": 0.5588,
"step": 292
},
{
"epoch": 0.06,
"grad_norm": 0.17010986804962158,
"learning_rate": 9.971529240878869e-06,
"loss": 0.5254,
"step": 293
},
{
"epoch": 0.06,
"grad_norm": 0.22150567173957825,
"learning_rate": 9.971156217266451e-06,
"loss": 0.545,
"step": 294
},
{
"epoch": 0.06,
"grad_norm": 0.24462495744228363,
"learning_rate": 9.97078077292512e-06,
"loss": 0.5738,
"step": 295
},
{
"epoch": 0.06,
"grad_norm": 0.21568679809570312,
"learning_rate": 9.970402908037703e-06,
"loss": 0.5129,
"step": 296
},
{
"epoch": 0.06,
"grad_norm": 0.22609004378318787,
"learning_rate": 9.970022622788198e-06,
"loss": 0.535,
"step": 297
},
{
"epoch": 0.06,
"grad_norm": 0.20871202647686005,
"learning_rate": 9.96963991736179e-06,
"loss": 0.5405,
"step": 298
},
{
"epoch": 0.06,
"grad_norm": 0.20957247912883759,
"learning_rate": 9.969254791944839e-06,
"loss": 0.4701,
"step": 299
},
{
"epoch": 0.06,
"grad_norm": 0.26258644461631775,
"learning_rate": 9.968867246724882e-06,
"loss": 0.5575,
"step": 300
},
{
"epoch": 0.06,
"grad_norm": 0.17494796216487885,
"learning_rate": 9.96847728189064e-06,
"loss": 0.5414,
"step": 301
},
{
"epoch": 0.07,
"grad_norm": 0.21521888673305511,
"learning_rate": 9.968084897632004e-06,
"loss": 0.5152,
"step": 302
},
{
"epoch": 0.07,
"grad_norm": 0.16436263918876648,
"learning_rate": 9.967690094140052e-06,
"loss": 0.5144,
"step": 303
},
{
"epoch": 0.07,
"grad_norm": 0.17806245386600494,
"learning_rate": 9.96729287160703e-06,
"loss": 0.59,
"step": 304
},
{
"epoch": 0.07,
"grad_norm": 0.13292109966278076,
"learning_rate": 9.966893230226371e-06,
"loss": 0.5804,
"step": 305
},
{
"epoch": 0.07,
"grad_norm": 0.18737316131591797,
"learning_rate": 9.966491170192682e-06,
"loss": 0.5104,
"step": 306
},
{
"epoch": 0.07,
"grad_norm": 0.25303030014038086,
"learning_rate": 9.966086691701748e-06,
"loss": 0.5501,
"step": 307
},
{
"epoch": 0.07,
"grad_norm": 0.17302893102169037,
"learning_rate": 9.96567979495053e-06,
"loss": 0.5236,
"step": 308
},
{
"epoch": 0.07,
"grad_norm": 0.16693797707557678,
"learning_rate": 9.96527048013717e-06,
"loss": 0.5157,
"step": 309
},
{
"epoch": 0.07,
"grad_norm": 0.22584576904773712,
"learning_rate": 9.964858747460989e-06,
"loss": 0.5828,
"step": 310
},
{
"epoch": 0.07,
"grad_norm": 0.21684272587299347,
"learning_rate": 9.964444597122476e-06,
"loss": 0.5082,
"step": 311
},
{
"epoch": 0.07,
"grad_norm": 0.16628780961036682,
"learning_rate": 9.964028029323305e-06,
"loss": 0.5581,
"step": 312
},
{
"epoch": 0.07,
"grad_norm": 0.15919576585292816,
"learning_rate": 9.963609044266328e-06,
"loss": 0.5713,
"step": 313
},
{
"epoch": 0.07,
"grad_norm": 0.17761071026325226,
"learning_rate": 9.963187642155573e-06,
"loss": 0.5417,
"step": 314
},
{
"epoch": 0.07,
"grad_norm": 0.24367289245128632,
"learning_rate": 9.962763823196242e-06,
"loss": 0.5147,
"step": 315
},
{
"epoch": 0.07,
"grad_norm": 0.15818822383880615,
"learning_rate": 9.962337587594713e-06,
"loss": 0.4555,
"step": 316
},
{
"epoch": 0.07,
"grad_norm": 0.1815144419670105,
"learning_rate": 9.961908935558548e-06,
"loss": 0.5394,
"step": 317
},
{
"epoch": 0.07,
"grad_norm": 0.18169505894184113,
"learning_rate": 9.961477867296479e-06,
"loss": 0.5654,
"step": 318
},
{
"epoch": 0.07,
"grad_norm": 0.15763549506664276,
"learning_rate": 9.961044383018416e-06,
"loss": 0.5565,
"step": 319
},
{
"epoch": 0.07,
"grad_norm": 0.1651836484670639,
"learning_rate": 9.96060848293545e-06,
"loss": 0.5522,
"step": 320
},
{
"epoch": 0.07,
"grad_norm": 0.1605907380580902,
"learning_rate": 9.96017016725984e-06,
"loss": 0.521,
"step": 321
},
{
"epoch": 0.07,
"grad_norm": 0.1753988415002823,
"learning_rate": 9.959729436205027e-06,
"loss": 0.5217,
"step": 322
},
{
"epoch": 0.07,
"grad_norm": 0.15843777358531952,
"learning_rate": 9.95928628998563e-06,
"loss": 0.5384,
"step": 323
},
{
"epoch": 0.07,
"grad_norm": 0.15907292068004608,
"learning_rate": 9.95884072881744e-06,
"loss": 0.4972,
"step": 324
},
{
"epoch": 0.07,
"grad_norm": 0.14925910532474518,
"learning_rate": 9.958392752917425e-06,
"loss": 0.5313,
"step": 325
},
{
"epoch": 0.07,
"grad_norm": 0.1515308916568756,
"learning_rate": 9.957942362503728e-06,
"loss": 0.5329,
"step": 326
},
{
"epoch": 0.07,
"grad_norm": 0.1642216593027115,
"learning_rate": 9.957489557795667e-06,
"loss": 0.516,
"step": 327
},
{
"epoch": 0.07,
"grad_norm": 0.1622897833585739,
"learning_rate": 9.957034339013742e-06,
"loss": 0.5641,
"step": 328
},
{
"epoch": 0.07,
"grad_norm": 0.19205595552921295,
"learning_rate": 9.956576706379623e-06,
"loss": 0.5109,
"step": 329
},
{
"epoch": 0.07,
"grad_norm": 0.16615568101406097,
"learning_rate": 9.956116660116155e-06,
"loss": 0.5208,
"step": 330
},
{
"epoch": 0.07,
"grad_norm": 0.24837036430835724,
"learning_rate": 9.95565420044736e-06,
"loss": 0.5572,
"step": 331
},
{
"epoch": 0.07,
"grad_norm": 0.15492476522922516,
"learning_rate": 9.955189327598435e-06,
"loss": 0.5439,
"step": 332
},
{
"epoch": 0.07,
"grad_norm": 0.21482093632221222,
"learning_rate": 9.954722041795753e-06,
"loss": 0.5498,
"step": 333
},
{
"epoch": 0.07,
"grad_norm": 0.2388935685157776,
"learning_rate": 9.954252343266859e-06,
"loss": 0.4783,
"step": 334
},
{
"epoch": 0.07,
"grad_norm": 0.16139458119869232,
"learning_rate": 9.953780232240477e-06,
"loss": 0.5553,
"step": 335
},
{
"epoch": 0.07,
"grad_norm": 0.20065993070602417,
"learning_rate": 9.953305708946504e-06,
"loss": 0.5273,
"step": 336
},
{
"epoch": 0.07,
"grad_norm": 0.19159255921840668,
"learning_rate": 9.95282877361601e-06,
"loss": 0.5237,
"step": 337
},
{
"epoch": 0.07,
"grad_norm": 0.21565450727939606,
"learning_rate": 9.952349426481243e-06,
"loss": 0.5408,
"step": 338
},
{
"epoch": 0.07,
"grad_norm": 0.16351784765720367,
"learning_rate": 9.95186766777562e-06,
"loss": 0.516,
"step": 339
},
{
"epoch": 0.07,
"grad_norm": 0.18081237375736237,
"learning_rate": 9.95138349773374e-06,
"loss": 0.5319,
"step": 340
},
{
"epoch": 0.07,
"grad_norm": 0.17618371546268463,
"learning_rate": 9.950896916591368e-06,
"loss": 0.4894,
"step": 341
},
{
"epoch": 0.07,
"grad_norm": 0.21193227171897888,
"learning_rate": 9.95040792458545e-06,
"loss": 0.5017,
"step": 342
},
{
"epoch": 0.07,
"grad_norm": 0.15902818739414215,
"learning_rate": 9.949916521954104e-06,
"loss": 0.5468,
"step": 343
},
{
"epoch": 0.07,
"grad_norm": 0.15740692615509033,
"learning_rate": 9.949422708936616e-06,
"loss": 0.5108,
"step": 344
},
{
"epoch": 0.07,
"grad_norm": 0.2678494453430176,
"learning_rate": 9.948926485773455e-06,
"loss": 0.5588,
"step": 345
},
{
"epoch": 0.07,
"grad_norm": 0.11566779017448425,
"learning_rate": 9.948427852706257e-06,
"loss": 0.5603,
"step": 346
},
{
"epoch": 0.07,
"grad_norm": 0.19342099130153656,
"learning_rate": 9.947926809977835e-06,
"loss": 0.5577,
"step": 347
},
{
"epoch": 0.07,
"grad_norm": 0.20827247202396393,
"learning_rate": 9.947423357832176e-06,
"loss": 0.5401,
"step": 348
},
{
"epoch": 0.08,
"grad_norm": 0.21266911923885345,
"learning_rate": 9.946917496514435e-06,
"loss": 0.5555,
"step": 349
},
{
"epoch": 0.08,
"grad_norm": 0.2901332378387451,
"learning_rate": 9.946409226270945e-06,
"loss": 0.5615,
"step": 350
},
{
"epoch": 0.08,
"grad_norm": 0.18509002029895782,
"learning_rate": 9.94589854734921e-06,
"loss": 0.5187,
"step": 351
},
{
"epoch": 0.08,
"grad_norm": 0.1265445202589035,
"learning_rate": 9.945385459997909e-06,
"loss": 0.5356,
"step": 352
},
{
"epoch": 0.08,
"grad_norm": 0.1595221310853958,
"learning_rate": 9.944869964466892e-06,
"loss": 0.5861,
"step": 353
},
{
"epoch": 0.08,
"grad_norm": 0.24293914437294006,
"learning_rate": 9.944352061007182e-06,
"loss": 0.5336,
"step": 354
},
{
"epoch": 0.08,
"grad_norm": 0.17409418523311615,
"learning_rate": 9.943831749870973e-06,
"loss": 0.5194,
"step": 355
},
{
"epoch": 0.08,
"grad_norm": 0.2043304294347763,
"learning_rate": 9.943309031311637e-06,
"loss": 0.5477,
"step": 356
},
{
"epoch": 0.08,
"grad_norm": 0.16727691888809204,
"learning_rate": 9.942783905583711e-06,
"loss": 0.5276,
"step": 357
},
{
"epoch": 0.08,
"grad_norm": 0.19069473445415497,
"learning_rate": 9.942256372942909e-06,
"loss": 0.5096,
"step": 358
},
{
"epoch": 0.08,
"grad_norm": 0.1717585176229477,
"learning_rate": 9.941726433646115e-06,
"loss": 0.5186,
"step": 359
},
{
"epoch": 0.08,
"grad_norm": 0.15714509785175323,
"learning_rate": 9.941194087951384e-06,
"loss": 0.5358,
"step": 360
},
{
"epoch": 0.08,
"grad_norm": 0.1681104153394699,
"learning_rate": 9.940659336117948e-06,
"loss": 0.5832,
"step": 361
},
{
"epoch": 0.08,
"grad_norm": 0.21985803544521332,
"learning_rate": 9.940122178406205e-06,
"loss": 0.5477,
"step": 362
},
{
"epoch": 0.08,
"grad_norm": 0.19286422431468964,
"learning_rate": 9.939582615077724e-06,
"loss": 0.5428,
"step": 363
},
{
"epoch": 0.08,
"grad_norm": 0.198882594704628,
"learning_rate": 9.939040646395252e-06,
"loss": 0.5572,
"step": 364
},
{
"epoch": 0.08,
"grad_norm": 0.18456579744815826,
"learning_rate": 9.938496272622703e-06,
"loss": 0.5168,
"step": 365
},
{
"epoch": 0.08,
"grad_norm": 0.15817482769489288,
"learning_rate": 9.93794949402516e-06,
"loss": 0.4895,
"step": 366
},
{
"epoch": 0.08,
"grad_norm": 0.17915575206279755,
"learning_rate": 9.937400310868883e-06,
"loss": 0.6069,
"step": 367
},
{
"epoch": 0.08,
"grad_norm": 0.24024631083011627,
"learning_rate": 9.936848723421295e-06,
"loss": 0.5585,
"step": 368
},
{
"epoch": 0.08,
"grad_norm": 0.20291557908058167,
"learning_rate": 9.936294731950999e-06,
"loss": 0.5197,
"step": 369
},
{
"epoch": 0.08,
"grad_norm": 0.17010553181171417,
"learning_rate": 9.93573833672776e-06,
"loss": 0.4911,
"step": 370
},
{
"epoch": 0.08,
"grad_norm": 0.21095992624759674,
"learning_rate": 9.935179538022518e-06,
"loss": 0.5152,
"step": 371
},
{
"epoch": 0.08,
"grad_norm": 0.14240865409374237,
"learning_rate": 9.934618336107385e-06,
"loss": 0.5663,
"step": 372
},
{
"epoch": 0.08,
"grad_norm": 0.13972750306129456,
"learning_rate": 9.934054731255638e-06,
"loss": 0.5214,
"step": 373
},
{
"epoch": 0.08,
"grad_norm": 0.2143140733242035,
"learning_rate": 9.933488723741731e-06,
"loss": 0.5213,
"step": 374
},
{
"epoch": 0.08,
"grad_norm": 0.16753612458705902,
"learning_rate": 9.932920313841281e-06,
"loss": 0.5654,
"step": 375
},
{
"epoch": 0.08,
"grad_norm": 0.17069406807422638,
"learning_rate": 9.932349501831077e-06,
"loss": 0.5813,
"step": 376
},
{
"epoch": 0.08,
"grad_norm": 0.16417956352233887,
"learning_rate": 9.931776287989084e-06,
"loss": 0.5091,
"step": 377
},
{
"epoch": 0.08,
"grad_norm": 0.14209146797657013,
"learning_rate": 9.931200672594425e-06,
"loss": 0.5498,
"step": 378
},
{
"epoch": 0.08,
"grad_norm": 0.15816187858581543,
"learning_rate": 9.930622655927403e-06,
"loss": 0.5175,
"step": 379
},
{
"epoch": 0.08,
"grad_norm": 0.1323387622833252,
"learning_rate": 9.930042238269485e-06,
"loss": 0.5217,
"step": 380
},
{
"epoch": 0.08,
"grad_norm": 0.14928382635116577,
"learning_rate": 9.929459419903307e-06,
"loss": 0.5655,
"step": 381
},
{
"epoch": 0.08,
"grad_norm": 0.1628965139389038,
"learning_rate": 9.928874201112677e-06,
"loss": 0.5221,
"step": 382
},
{
"epoch": 0.08,
"grad_norm": 0.16259951889514923,
"learning_rate": 9.92828658218257e-06,
"loss": 0.496,
"step": 383
},
{
"epoch": 0.08,
"grad_norm": 0.15290167927742004,
"learning_rate": 9.927696563399127e-06,
"loss": 0.5241,
"step": 384
},
{
"epoch": 0.08,
"grad_norm": 0.1801231950521469,
"learning_rate": 9.927104145049664e-06,
"loss": 0.5671,
"step": 385
},
{
"epoch": 0.08,
"grad_norm": 0.1866559088230133,
"learning_rate": 9.926509327422661e-06,
"loss": 0.5476,
"step": 386
},
{
"epoch": 0.08,
"grad_norm": 0.1865171194076538,
"learning_rate": 9.925912110807766e-06,
"loss": 0.5352,
"step": 387
},
{
"epoch": 0.08,
"grad_norm": 0.1893538534641266,
"learning_rate": 9.9253124954958e-06,
"loss": 0.5537,
"step": 388
},
{
"epoch": 0.08,
"grad_norm": 0.18070872128009796,
"learning_rate": 9.924710481778746e-06,
"loss": 0.5292,
"step": 389
},
{
"epoch": 0.08,
"grad_norm": 0.17466960847377777,
"learning_rate": 9.924106069949756e-06,
"loss": 0.5212,
"step": 390
},
{
"epoch": 0.08,
"grad_norm": 0.18615244328975677,
"learning_rate": 9.923499260303155e-06,
"loss": 0.5116,
"step": 391
},
{
"epoch": 0.08,
"grad_norm": 0.17259790003299713,
"learning_rate": 9.922890053134428e-06,
"loss": 0.5159,
"step": 392
},
{
"epoch": 0.08,
"grad_norm": 0.16246852278709412,
"learning_rate": 9.922278448740235e-06,
"loss": 0.5268,
"step": 393
},
{
"epoch": 0.08,
"grad_norm": 0.15586566925048828,
"learning_rate": 9.9216644474184e-06,
"loss": 0.4965,
"step": 394
},
{
"epoch": 0.09,
"grad_norm": 0.1761687695980072,
"learning_rate": 9.92104804946791e-06,
"loss": 0.5504,
"step": 395
},
{
"epoch": 0.09,
"grad_norm": 0.19697882235050201,
"learning_rate": 9.920429255188926e-06,
"loss": 0.5055,
"step": 396
},
{
"epoch": 0.09,
"grad_norm": 0.1910158395767212,
"learning_rate": 9.919808064882773e-06,
"loss": 0.4947,
"step": 397
},
{
"epoch": 0.09,
"grad_norm": 0.18492764234542847,
"learning_rate": 9.91918447885194e-06,
"loss": 0.569,
"step": 398
},
{
"epoch": 0.09,
"grad_norm": 0.17928937077522278,
"learning_rate": 9.918558497400088e-06,
"loss": 0.4933,
"step": 399
},
{
"epoch": 0.09,
"grad_norm": 0.18491177260875702,
"learning_rate": 9.91793012083204e-06,
"loss": 0.5489,
"step": 400
},
{
"epoch": 0.09,
"grad_norm": 0.1796533763408661,
"learning_rate": 9.917299349453791e-06,
"loss": 0.5575,
"step": 401
},
{
"epoch": 0.09,
"grad_norm": 0.14460118114948273,
"learning_rate": 9.916666183572492e-06,
"loss": 0.4632,
"step": 402
},
{
"epoch": 0.09,
"grad_norm": 0.13730689883232117,
"learning_rate": 9.916030623496472e-06,
"loss": 0.5634,
"step": 403
},
{
"epoch": 0.09,
"grad_norm": 0.18971490859985352,
"learning_rate": 9.915392669535214e-06,
"loss": 0.5193,
"step": 404
},
{
"epoch": 0.09,
"grad_norm": 0.12481328845024109,
"learning_rate": 9.914752321999379e-06,
"loss": 0.5389,
"step": 405
},
{
"epoch": 0.09,
"grad_norm": 0.17612747848033905,
"learning_rate": 9.914109581200785e-06,
"loss": 0.5129,
"step": 406
},
{
"epoch": 0.09,
"grad_norm": 0.1852181851863861,
"learning_rate": 9.913464447452414e-06,
"loss": 0.5124,
"step": 407
},
{
"epoch": 0.09,
"grad_norm": 0.23606260120868683,
"learning_rate": 9.912816921068424e-06,
"loss": 0.4736,
"step": 408
},
{
"epoch": 0.09,
"grad_norm": 0.23079735040664673,
"learning_rate": 9.912167002364126e-06,
"loss": 0.5612,
"step": 409
},
{
"epoch": 0.09,
"grad_norm": 0.22326047718524933,
"learning_rate": 9.911514691656003e-06,
"loss": 0.5367,
"step": 410
},
{
"epoch": 0.09,
"grad_norm": 0.1975882351398468,
"learning_rate": 9.910859989261702e-06,
"loss": 0.5575,
"step": 411
},
{
"epoch": 0.09,
"grad_norm": 0.16411826014518738,
"learning_rate": 9.910202895500031e-06,
"loss": 0.5506,
"step": 412
},
{
"epoch": 0.09,
"grad_norm": 0.1982284039258957,
"learning_rate": 9.909543410690967e-06,
"loss": 0.5443,
"step": 413
},
{
"epoch": 0.09,
"grad_norm": 0.1679336577653885,
"learning_rate": 9.908881535155647e-06,
"loss": 0.4876,
"step": 414
},
{
"epoch": 0.09,
"grad_norm": 0.17549291253089905,
"learning_rate": 9.908217269216377e-06,
"loss": 0.558,
"step": 415
},
{
"epoch": 0.09,
"grad_norm": 0.13716278970241547,
"learning_rate": 9.907550613196624e-06,
"loss": 0.5527,
"step": 416
},
{
"epoch": 0.09,
"grad_norm": 0.171469584107399,
"learning_rate": 9.90688156742102e-06,
"loss": 0.5066,
"step": 417
},
{
"epoch": 0.09,
"grad_norm": 0.19487224519252777,
"learning_rate": 9.906210132215357e-06,
"loss": 0.5211,
"step": 418
},
{
"epoch": 0.09,
"grad_norm": 0.16895724833011627,
"learning_rate": 9.905536307906599e-06,
"loss": 0.4936,
"step": 419
},
{
"epoch": 0.09,
"grad_norm": 0.2055499255657196,
"learning_rate": 9.904860094822861e-06,
"loss": 0.4719,
"step": 420
},
{
"epoch": 0.09,
"grad_norm": 0.30334606766700745,
"learning_rate": 9.904181493293434e-06,
"loss": 0.5743,
"step": 421
},
{
"epoch": 0.09,
"grad_norm": 0.15841247141361237,
"learning_rate": 9.903500503648766e-06,
"loss": 0.5722,
"step": 422
},
{
"epoch": 0.09,
"grad_norm": 0.15331457555294037,
"learning_rate": 9.902817126220465e-06,
"loss": 0.4636,
"step": 423
},
{
"epoch": 0.09,
"grad_norm": 0.22973452508449554,
"learning_rate": 9.902131361341307e-06,
"loss": 0.5427,
"step": 424
},
{
"epoch": 0.09,
"grad_norm": 0.21478115022182465,
"learning_rate": 9.901443209345229e-06,
"loss": 0.5324,
"step": 425
},
{
"epoch": 0.09,
"grad_norm": 0.2344510704278946,
"learning_rate": 9.900752670567331e-06,
"loss": 0.5439,
"step": 426
},
{
"epoch": 0.09,
"grad_norm": 0.17472712695598602,
"learning_rate": 9.90005974534387e-06,
"loss": 0.4745,
"step": 427
},
{
"epoch": 0.09,
"grad_norm": 0.1476239264011383,
"learning_rate": 9.899364434012273e-06,
"loss": 0.4726,
"step": 428
},
{
"epoch": 0.09,
"grad_norm": 0.174478217959404,
"learning_rate": 9.898666736911125e-06,
"loss": 0.5485,
"step": 429
},
{
"epoch": 0.09,
"grad_norm": 0.20660632848739624,
"learning_rate": 9.897966654380172e-06,
"loss": 0.5274,
"step": 430
},
{
"epoch": 0.09,
"grad_norm": 0.1528811752796173,
"learning_rate": 9.89726418676032e-06,
"loss": 0.5305,
"step": 431
},
{
"epoch": 0.09,
"grad_norm": 0.23785395920276642,
"learning_rate": 9.896559334393644e-06,
"loss": 0.5553,
"step": 432
},
{
"epoch": 0.09,
"grad_norm": 0.19750644266605377,
"learning_rate": 9.895852097623374e-06,
"loss": 0.5441,
"step": 433
},
{
"epoch": 0.09,
"grad_norm": 0.16664327681064606,
"learning_rate": 9.895142476793902e-06,
"loss": 0.4756,
"step": 434
},
{
"epoch": 0.09,
"grad_norm": 0.18724434077739716,
"learning_rate": 9.89443047225078e-06,
"loss": 0.5046,
"step": 435
},
{
"epoch": 0.09,
"grad_norm": 0.20234829187393188,
"learning_rate": 9.893716084340723e-06,
"loss": 0.5276,
"step": 436
},
{
"epoch": 0.09,
"grad_norm": 0.17969612777233124,
"learning_rate": 9.892999313411607e-06,
"loss": 0.5428,
"step": 437
},
{
"epoch": 0.09,
"grad_norm": 0.19304272532463074,
"learning_rate": 9.892280159812465e-06,
"loss": 0.5281,
"step": 438
},
{
"epoch": 0.09,
"grad_norm": 0.15909235179424286,
"learning_rate": 9.891558623893492e-06,
"loss": 0.5393,
"step": 439
},
{
"epoch": 0.09,
"grad_norm": 0.24154618382453918,
"learning_rate": 9.890834706006048e-06,
"loss": 0.5446,
"step": 440
},
{
"epoch": 0.09,
"grad_norm": 0.1484946757555008,
"learning_rate": 9.890108406502642e-06,
"loss": 0.5034,
"step": 441
},
{
"epoch": 0.1,
"grad_norm": 0.20041412115097046,
"learning_rate": 9.889379725736953e-06,
"loss": 0.5569,
"step": 442
},
{
"epoch": 0.1,
"grad_norm": 0.1696542501449585,
"learning_rate": 9.888648664063815e-06,
"loss": 0.5521,
"step": 443
},
{
"epoch": 0.1,
"grad_norm": 0.2253563106060028,
"learning_rate": 9.887915221839223e-06,
"loss": 0.5881,
"step": 444
},
{
"epoch": 0.1,
"grad_norm": 0.16398414969444275,
"learning_rate": 9.88717939942033e-06,
"loss": 0.5276,
"step": 445
},
{
"epoch": 0.1,
"grad_norm": 0.19543707370758057,
"learning_rate": 9.886441197165446e-06,
"loss": 0.5172,
"step": 446
},
{
"epoch": 0.1,
"grad_norm": 0.19510690867900848,
"learning_rate": 9.885700615434044e-06,
"loss": 0.5489,
"step": 447
},
{
"epoch": 0.1,
"grad_norm": 0.20647871494293213,
"learning_rate": 9.884957654586753e-06,
"loss": 0.5691,
"step": 448
},
{
"epoch": 0.1,
"grad_norm": 0.1428651362657547,
"learning_rate": 9.884212314985363e-06,
"loss": 0.5415,
"step": 449
},
{
"epoch": 0.1,
"grad_norm": 0.20169362425804138,
"learning_rate": 9.88346459699282e-06,
"loss": 0.5035,
"step": 450
},
{
"epoch": 0.1,
"grad_norm": 0.1399114578962326,
"learning_rate": 9.88271450097323e-06,
"loss": 0.4997,
"step": 451
},
{
"epoch": 0.1,
"grad_norm": 0.13809053599834442,
"learning_rate": 9.881962027291855e-06,
"loss": 0.5106,
"step": 452
},
{
"epoch": 0.1,
"grad_norm": 0.15126360952854156,
"learning_rate": 9.881207176315112e-06,
"loss": 0.4804,
"step": 453
},
{
"epoch": 0.1,
"grad_norm": 0.17541149258613586,
"learning_rate": 9.880449948410587e-06,
"loss": 0.5529,
"step": 454
},
{
"epoch": 0.1,
"grad_norm": 0.21182189881801605,
"learning_rate": 9.879690343947009e-06,
"loss": 0.5671,
"step": 455
},
{
"epoch": 0.1,
"grad_norm": 0.16285210847854614,
"learning_rate": 9.878928363294275e-06,
"loss": 0.5288,
"step": 456
},
{
"epoch": 0.1,
"grad_norm": 0.17910024523735046,
"learning_rate": 9.878164006823434e-06,
"loss": 0.4876,
"step": 457
},
{
"epoch": 0.1,
"grad_norm": 0.18861602246761322,
"learning_rate": 9.877397274906694e-06,
"loss": 0.5403,
"step": 458
},
{
"epoch": 0.1,
"grad_norm": 0.18446660041809082,
"learning_rate": 9.876628167917417e-06,
"loss": 0.5558,
"step": 459
},
{
"epoch": 0.1,
"grad_norm": 0.23668085038661957,
"learning_rate": 9.875856686230125e-06,
"loss": 0.5781,
"step": 460
},
{
"epoch": 0.1,
"grad_norm": 0.1459677368402481,
"learning_rate": 9.875082830220496e-06,
"loss": 0.5102,
"step": 461
},
{
"epoch": 0.1,
"grad_norm": 0.19470389187335968,
"learning_rate": 9.87430660026536e-06,
"loss": 0.4579,
"step": 462
},
{
"epoch": 0.1,
"grad_norm": 0.13941837847232819,
"learning_rate": 9.873527996742707e-06,
"loss": 0.5971,
"step": 463
},
{
"epoch": 0.1,
"grad_norm": 0.1971631497144699,
"learning_rate": 9.872747020031682e-06,
"loss": 0.5637,
"step": 464
},
{
"epoch": 0.1,
"grad_norm": 0.1430051177740097,
"learning_rate": 9.871963670512586e-06,
"loss": 0.4621,
"step": 465
},
{
"epoch": 0.1,
"grad_norm": 0.1990920901298523,
"learning_rate": 9.871177948566875e-06,
"loss": 0.508,
"step": 466
},
{
"epoch": 0.1,
"grad_norm": 0.1744355857372284,
"learning_rate": 9.870389854577157e-06,
"loss": 0.5115,
"step": 467
},
{
"epoch": 0.1,
"grad_norm": 0.1770336627960205,
"learning_rate": 9.869599388927204e-06,
"loss": 0.5535,
"step": 468
},
{
"epoch": 0.1,
"grad_norm": 0.1405770629644394,
"learning_rate": 9.868806552001933e-06,
"loss": 0.5188,
"step": 469
},
{
"epoch": 0.1,
"grad_norm": 0.164622500538826,
"learning_rate": 9.868011344187421e-06,
"loss": 0.543,
"step": 470
},
{
"epoch": 0.1,
"grad_norm": 0.34272515773773193,
"learning_rate": 9.867213765870897e-06,
"loss": 0.444,
"step": 471
},
{
"epoch": 0.1,
"grad_norm": 0.29466864466667175,
"learning_rate": 9.866413817440748e-06,
"loss": 0.5177,
"step": 472
},
{
"epoch": 0.1,
"grad_norm": 0.1591256856918335,
"learning_rate": 9.865611499286511e-06,
"loss": 0.543,
"step": 473
},
{
"epoch": 0.1,
"grad_norm": 0.17290642857551575,
"learning_rate": 9.864806811798881e-06,
"loss": 0.5571,
"step": 474
},
{
"epoch": 0.1,
"grad_norm": 0.14120014011859894,
"learning_rate": 9.863999755369703e-06,
"loss": 0.5366,
"step": 475
},
{
"epoch": 0.1,
"grad_norm": 0.1678173840045929,
"learning_rate": 9.863190330391974e-06,
"loss": 0.5301,
"step": 476
},
{
"epoch": 0.1,
"grad_norm": 0.17042382061481476,
"learning_rate": 9.862378537259853e-06,
"loss": 0.5669,
"step": 477
},
{
"epoch": 0.1,
"grad_norm": 0.1473226100206375,
"learning_rate": 9.861564376368645e-06,
"loss": 0.5113,
"step": 478
},
{
"epoch": 0.1,
"grad_norm": 0.1683841496706009,
"learning_rate": 9.860747848114805e-06,
"loss": 0.542,
"step": 479
},
{
"epoch": 0.1,
"grad_norm": 0.17106866836547852,
"learning_rate": 9.859928952895952e-06,
"loss": 0.5023,
"step": 480
},
{
"epoch": 0.1,
"grad_norm": 0.16280145943164825,
"learning_rate": 9.859107691110847e-06,
"loss": 0.5605,
"step": 481
},
{
"epoch": 0.1,
"grad_norm": 0.14175820350646973,
"learning_rate": 9.858284063159411e-06,
"loss": 0.5716,
"step": 482
},
{
"epoch": 0.1,
"grad_norm": 0.21412678062915802,
"learning_rate": 9.857458069442709e-06,
"loss": 0.515,
"step": 483
},
{
"epoch": 0.1,
"grad_norm": 0.19349491596221924,
"learning_rate": 9.856629710362966e-06,
"loss": 0.5198,
"step": 484
},
{
"epoch": 0.1,
"grad_norm": 0.1516617089509964,
"learning_rate": 9.855798986323556e-06,
"loss": 0.4953,
"step": 485
},
{
"epoch": 0.1,
"grad_norm": 0.2074221968650818,
"learning_rate": 9.854965897729001e-06,
"loss": 0.5118,
"step": 486
},
{
"epoch": 0.1,
"grad_norm": 0.14066927134990692,
"learning_rate": 9.85413044498498e-06,
"loss": 0.5228,
"step": 487
},
{
"epoch": 0.11,
"grad_norm": 0.2228998988866806,
"learning_rate": 9.853292628498319e-06,
"loss": 0.6139,
"step": 488
},
{
"epoch": 0.11,
"grad_norm": 0.31960368156433105,
"learning_rate": 9.852452448676999e-06,
"loss": 0.5553,
"step": 489
},
{
"epoch": 0.11,
"grad_norm": 0.17156168818473816,
"learning_rate": 9.851609905930149e-06,
"loss": 0.5373,
"step": 490
},
{
"epoch": 0.11,
"grad_norm": 0.12861701846122742,
"learning_rate": 9.850765000668048e-06,
"loss": 0.5126,
"step": 491
},
{
"epoch": 0.11,
"grad_norm": 0.17264096438884735,
"learning_rate": 9.849917733302128e-06,
"loss": 0.5141,
"step": 492
},
{
"epoch": 0.11,
"grad_norm": 0.16210493445396423,
"learning_rate": 9.84906810424497e-06,
"loss": 0.524,
"step": 493
},
{
"epoch": 0.11,
"grad_norm": 0.12152129411697388,
"learning_rate": 9.848216113910306e-06,
"loss": 0.5405,
"step": 494
},
{
"epoch": 0.11,
"grad_norm": 0.17325671017169952,
"learning_rate": 9.847361762713013e-06,
"loss": 0.5062,
"step": 495
},
{
"epoch": 0.11,
"grad_norm": 0.14273308217525482,
"learning_rate": 9.846505051069126e-06,
"loss": 0.5302,
"step": 496
},
{
"epoch": 0.11,
"grad_norm": 0.2055240273475647,
"learning_rate": 9.845645979395824e-06,
"loss": 0.5018,
"step": 497
},
{
"epoch": 0.11,
"grad_norm": 0.1372431516647339,
"learning_rate": 9.844784548111433e-06,
"loss": 0.5665,
"step": 498
},
{
"epoch": 0.11,
"grad_norm": 0.1912691444158554,
"learning_rate": 9.843920757635435e-06,
"loss": 0.5267,
"step": 499
},
{
"epoch": 0.11,
"grad_norm": 0.14471903443336487,
"learning_rate": 9.843054608388455e-06,
"loss": 0.5087,
"step": 500
},
{
"epoch": 0.11,
"grad_norm": 0.17829883098602295,
"learning_rate": 9.84218610079227e-06,
"loss": 0.5029,
"step": 501
},
{
"epoch": 0.11,
"grad_norm": 0.16071033477783203,
"learning_rate": 9.8413152352698e-06,
"loss": 0.5259,
"step": 502
},
{
"epoch": 0.11,
"grad_norm": 0.21240952610969543,
"learning_rate": 9.840442012245125e-06,
"loss": 0.5266,
"step": 503
},
{
"epoch": 0.11,
"grad_norm": 0.1682009994983673,
"learning_rate": 9.839566432143459e-06,
"loss": 0.5132,
"step": 504
},
{
"epoch": 0.11,
"grad_norm": 0.14732059836387634,
"learning_rate": 9.838688495391171e-06,
"loss": 0.5745,
"step": 505
},
{
"epoch": 0.11,
"grad_norm": 0.15087178349494934,
"learning_rate": 9.837808202415778e-06,
"loss": 0.5017,
"step": 506
},
{
"epoch": 0.11,
"grad_norm": 0.16476622223854065,
"learning_rate": 9.836925553645941e-06,
"loss": 0.5044,
"step": 507
},
{
"epoch": 0.11,
"grad_norm": 0.23170307278633118,
"learning_rate": 9.836040549511472e-06,
"loss": 0.574,
"step": 508
},
{
"epoch": 0.11,
"grad_norm": 0.18723872303962708,
"learning_rate": 9.835153190443327e-06,
"loss": 0.4981,
"step": 509
},
{
"epoch": 0.11,
"grad_norm": 0.18692149221897125,
"learning_rate": 9.83426347687361e-06,
"loss": 0.554,
"step": 510
},
{
"epoch": 0.11,
"grad_norm": 0.16876354813575745,
"learning_rate": 9.833371409235575e-06,
"loss": 0.5535,
"step": 511
},
{
"epoch": 0.11,
"grad_norm": 0.1443847119808197,
"learning_rate": 9.832476987963613e-06,
"loss": 0.4957,
"step": 512
},
{
"epoch": 0.11,
"grad_norm": 0.17338885366916656,
"learning_rate": 9.83158021349327e-06,
"loss": 0.5019,
"step": 513
},
{
"epoch": 0.11,
"grad_norm": 0.19001881778240204,
"learning_rate": 9.830681086261234e-06,
"loss": 0.5165,
"step": 514
},
{
"epoch": 0.11,
"grad_norm": 0.24521715939044952,
"learning_rate": 9.829779606705337e-06,
"loss": 0.579,
"step": 515
},
{
"epoch": 0.11,
"grad_norm": 0.16400645673274994,
"learning_rate": 9.828875775264564e-06,
"loss": 0.5429,
"step": 516
},
{
"epoch": 0.11,
"grad_norm": 0.2782368063926697,
"learning_rate": 9.827969592379036e-06,
"loss": 0.4832,
"step": 517
},
{
"epoch": 0.11,
"grad_norm": 0.15196365118026733,
"learning_rate": 9.827061058490027e-06,
"loss": 0.4643,
"step": 518
},
{
"epoch": 0.11,
"grad_norm": 0.17149809002876282,
"learning_rate": 9.826150174039949e-06,
"loss": 0.5388,
"step": 519
},
{
"epoch": 0.11,
"grad_norm": 0.152251735329628,
"learning_rate": 9.82523693947236e-06,
"loss": 0.5147,
"step": 520
},
{
"epoch": 0.11,
"grad_norm": 0.1551138162612915,
"learning_rate": 9.824321355231968e-06,
"loss": 0.4826,
"step": 521
},
{
"epoch": 0.11,
"grad_norm": 0.15354926884174347,
"learning_rate": 9.82340342176462e-06,
"loss": 0.482,
"step": 522
},
{
"epoch": 0.11,
"grad_norm": 0.15569601953029633,
"learning_rate": 9.822483139517307e-06,
"loss": 0.4989,
"step": 523
},
{
"epoch": 0.11,
"grad_norm": 0.17023304104804993,
"learning_rate": 9.821560508938167e-06,
"loss": 0.4974,
"step": 524
},
{
"epoch": 0.11,
"grad_norm": 0.17514115571975708,
"learning_rate": 9.820635530476478e-06,
"loss": 0.4923,
"step": 525
},
{
"epoch": 0.11,
"grad_norm": 0.2187749445438385,
"learning_rate": 9.819708204582664e-06,
"loss": 0.5623,
"step": 526
},
{
"epoch": 0.11,
"grad_norm": 0.1382010579109192,
"learning_rate": 9.818778531708288e-06,
"loss": 0.4999,
"step": 527
},
{
"epoch": 0.11,
"grad_norm": 0.15643168985843658,
"learning_rate": 9.817846512306062e-06,
"loss": 0.4885,
"step": 528
},
{
"epoch": 0.11,
"grad_norm": 0.16030187904834747,
"learning_rate": 9.816912146829836e-06,
"loss": 0.5217,
"step": 529
},
{
"epoch": 0.11,
"grad_norm": 0.2057688981294632,
"learning_rate": 9.815975435734604e-06,
"loss": 0.5254,
"step": 530
},
{
"epoch": 0.11,
"grad_norm": 0.20325696468353271,
"learning_rate": 9.815036379476502e-06,
"loss": 0.5831,
"step": 531
},
{
"epoch": 0.11,
"grad_norm": 0.19320160150527954,
"learning_rate": 9.814094978512808e-06,
"loss": 0.4558,
"step": 532
},
{
"epoch": 0.11,
"grad_norm": 0.20372559130191803,
"learning_rate": 9.813151233301943e-06,
"loss": 0.5431,
"step": 533
},
{
"epoch": 0.12,
"grad_norm": 0.18915057182312012,
"learning_rate": 9.812205144303466e-06,
"loss": 0.5243,
"step": 534
},
{
"epoch": 0.12,
"grad_norm": 0.25529226660728455,
"learning_rate": 9.811256711978082e-06,
"loss": 0.5403,
"step": 535
},
{
"epoch": 0.12,
"grad_norm": 0.2046184092760086,
"learning_rate": 9.810305936787633e-06,
"loss": 0.5275,
"step": 536
},
{
"epoch": 0.12,
"grad_norm": 0.15673565864562988,
"learning_rate": 9.809352819195106e-06,
"loss": 0.57,
"step": 537
},
{
"epoch": 0.12,
"grad_norm": 0.13071295619010925,
"learning_rate": 9.808397359664624e-06,
"loss": 0.5232,
"step": 538
},
{
"epoch": 0.12,
"grad_norm": 0.17526838183403015,
"learning_rate": 9.807439558661453e-06,
"loss": 0.498,
"step": 539
},
{
"epoch": 0.12,
"grad_norm": 0.1860094964504242,
"learning_rate": 9.806479416652e-06,
"loss": 0.5327,
"step": 540
},
{
"epoch": 0.12,
"grad_norm": 0.18813055753707886,
"learning_rate": 9.80551693410381e-06,
"loss": 0.5199,
"step": 541
},
{
"epoch": 0.12,
"grad_norm": 0.1620221734046936,
"learning_rate": 9.804552111485568e-06,
"loss": 0.4961,
"step": 542
},
{
"epoch": 0.12,
"grad_norm": 0.2016637921333313,
"learning_rate": 9.8035849492671e-06,
"loss": 0.5042,
"step": 543
},
{
"epoch": 0.12,
"grad_norm": 0.15297263860702515,
"learning_rate": 9.80261544791937e-06,
"loss": 0.5561,
"step": 544
},
{
"epoch": 0.12,
"grad_norm": 0.16937948763370514,
"learning_rate": 9.801643607914485e-06,
"loss": 0.5356,
"step": 545
},
{
"epoch": 0.12,
"grad_norm": 0.1809961050748825,
"learning_rate": 9.80066942972568e-06,
"loss": 0.5516,
"step": 546
},
{
"epoch": 0.12,
"grad_norm": 0.14697958528995514,
"learning_rate": 9.799692913827342e-06,
"loss": 0.5072,
"step": 547
},
{
"epoch": 0.12,
"grad_norm": 0.18541914224624634,
"learning_rate": 9.798714060694988e-06,
"loss": 0.4925,
"step": 548
},
{
"epoch": 0.12,
"grad_norm": 0.19727596640586853,
"learning_rate": 9.797732870805273e-06,
"loss": 0.5206,
"step": 549
},
{
"epoch": 0.12,
"grad_norm": 0.15478399395942688,
"learning_rate": 9.796749344635996e-06,
"loss": 0.5122,
"step": 550
},
{
"epoch": 0.12,
"grad_norm": 0.1174599900841713,
"learning_rate": 9.79576348266609e-06,
"loss": 0.5003,
"step": 551
},
{
"epoch": 0.12,
"grad_norm": 0.16585126519203186,
"learning_rate": 9.794775285375623e-06,
"loss": 0.5029,
"step": 552
},
{
"epoch": 0.12,
"grad_norm": 0.17350535094738007,
"learning_rate": 9.793784753245802e-06,
"loss": 0.548,
"step": 553
},
{
"epoch": 0.12,
"grad_norm": 0.18735840916633606,
"learning_rate": 9.792791886758976e-06,
"loss": 0.5455,
"step": 554
},
{
"epoch": 0.12,
"grad_norm": 0.22835886478424072,
"learning_rate": 9.79179668639862e-06,
"loss": 0.4881,
"step": 555
},
{
"epoch": 0.12,
"grad_norm": 0.16034086048603058,
"learning_rate": 9.790799152649356e-06,
"loss": 0.5222,
"step": 556
},
{
"epoch": 0.12,
"grad_norm": 0.20814630389213562,
"learning_rate": 9.789799285996937e-06,
"loss": 0.5489,
"step": 557
},
{
"epoch": 0.12,
"grad_norm": 0.17920532822608948,
"learning_rate": 9.788797086928252e-06,
"loss": 0.493,
"step": 558
},
{
"epoch": 0.12,
"grad_norm": 0.21627596020698547,
"learning_rate": 9.787792555931328e-06,
"loss": 0.5491,
"step": 559
},
{
"epoch": 0.12,
"grad_norm": 0.14485371112823486,
"learning_rate": 9.786785693495327e-06,
"loss": 0.5144,
"step": 560
},
{
"epoch": 0.12,
"grad_norm": 0.1546938121318817,
"learning_rate": 9.785776500110542e-06,
"loss": 0.4812,
"step": 561
},
{
"epoch": 0.12,
"grad_norm": 0.1761971116065979,
"learning_rate": 9.784764976268408e-06,
"loss": 0.5788,
"step": 562
},
{
"epoch": 0.12,
"grad_norm": 0.18302011489868164,
"learning_rate": 9.78375112246149e-06,
"loss": 0.5186,
"step": 563
},
{
"epoch": 0.12,
"grad_norm": 0.17190533876419067,
"learning_rate": 9.78273493918349e-06,
"loss": 0.5252,
"step": 564
},
{
"epoch": 0.12,
"grad_norm": 0.1821742206811905,
"learning_rate": 9.781716426929243e-06,
"loss": 0.5174,
"step": 565
},
{
"epoch": 0.12,
"grad_norm": 0.19061587750911713,
"learning_rate": 9.780695586194719e-06,
"loss": 0.5662,
"step": 566
},
{
"epoch": 0.12,
"grad_norm": 0.15308646857738495,
"learning_rate": 9.77967241747702e-06,
"loss": 0.5297,
"step": 567
},
{
"epoch": 0.12,
"grad_norm": 0.16192299127578735,
"learning_rate": 9.778646921274385e-06,
"loss": 0.5846,
"step": 568
},
{
"epoch": 0.12,
"grad_norm": 0.1472279578447342,
"learning_rate": 9.777619098086181e-06,
"loss": 0.5596,
"step": 569
},
{
"epoch": 0.12,
"grad_norm": 0.20292969048023224,
"learning_rate": 9.776588948412917e-06,
"loss": 0.5179,
"step": 570
},
{
"epoch": 0.12,
"grad_norm": 0.16102533042430878,
"learning_rate": 9.775556472756226e-06,
"loss": 0.4919,
"step": 571
},
{
"epoch": 0.12,
"grad_norm": 0.18485024571418762,
"learning_rate": 9.774521671618877e-06,
"loss": 0.5455,
"step": 572
},
{
"epoch": 0.12,
"grad_norm": 0.1821717470884323,
"learning_rate": 9.773484545504771e-06,
"loss": 0.5091,
"step": 573
},
{
"epoch": 0.12,
"grad_norm": 0.16444329917430878,
"learning_rate": 9.772445094918944e-06,
"loss": 0.5218,
"step": 574
},
{
"epoch": 0.12,
"grad_norm": 0.17274467647075653,
"learning_rate": 9.771403320367558e-06,
"loss": 0.5823,
"step": 575
},
{
"epoch": 0.12,
"grad_norm": 0.15213851630687714,
"learning_rate": 9.770359222357914e-06,
"loss": 0.4696,
"step": 576
},
{
"epoch": 0.12,
"grad_norm": 0.13404731452465057,
"learning_rate": 9.76931280139844e-06,
"loss": 0.5365,
"step": 577
},
{
"epoch": 0.12,
"grad_norm": 0.1744057685136795,
"learning_rate": 9.768264057998693e-06,
"loss": 0.5559,
"step": 578
},
{
"epoch": 0.12,
"grad_norm": 0.17314410209655762,
"learning_rate": 9.767212992669368e-06,
"loss": 0.5614,
"step": 579
},
{
"epoch": 0.12,
"grad_norm": 0.1846940666437149,
"learning_rate": 9.766159605922282e-06,
"loss": 0.5122,
"step": 580
},
{
"epoch": 0.13,
"grad_norm": 0.15393884479999542,
"learning_rate": 9.76510389827039e-06,
"loss": 0.5734,
"step": 581
},
{
"epoch": 0.13,
"grad_norm": 0.1504923403263092,
"learning_rate": 9.764045870227772e-06,
"loss": 0.5111,
"step": 582
},
{
"epoch": 0.13,
"grad_norm": 0.16151262819766998,
"learning_rate": 9.762985522309642e-06,
"loss": 0.4965,
"step": 583
},
{
"epoch": 0.13,
"grad_norm": 0.15211625397205353,
"learning_rate": 9.761922855032339e-06,
"loss": 0.5263,
"step": 584
},
{
"epoch": 0.13,
"grad_norm": 0.17688104510307312,
"learning_rate": 9.760857868913335e-06,
"loss": 0.4846,
"step": 585
},
{
"epoch": 0.13,
"grad_norm": 0.1481778621673584,
"learning_rate": 9.759790564471233e-06,
"loss": 0.5189,
"step": 586
},
{
"epoch": 0.13,
"grad_norm": 0.1728227287530899,
"learning_rate": 9.758720942225759e-06,
"loss": 0.4878,
"step": 587
},
{
"epoch": 0.13,
"grad_norm": 0.15571308135986328,
"learning_rate": 9.757649002697771e-06,
"loss": 0.5456,
"step": 588
},
{
"epoch": 0.13,
"grad_norm": 0.15774881839752197,
"learning_rate": 9.756574746409258e-06,
"loss": 0.522,
"step": 589
},
{
"epoch": 0.13,
"grad_norm": 0.18842703104019165,
"learning_rate": 9.755498173883331e-06,
"loss": 0.442,
"step": 590
},
{
"epoch": 0.13,
"grad_norm": 0.1557362824678421,
"learning_rate": 9.754419285644233e-06,
"loss": 0.5149,
"step": 591
},
{
"epoch": 0.13,
"grad_norm": 0.15488624572753906,
"learning_rate": 9.753338082217334e-06,
"loss": 0.5567,
"step": 592
},
{
"epoch": 0.13,
"grad_norm": 0.14816376566886902,
"learning_rate": 9.752254564129134e-06,
"loss": 0.5244,
"step": 593
},
{
"epoch": 0.13,
"grad_norm": 0.144754558801651,
"learning_rate": 9.751168731907253e-06,
"loss": 0.4777,
"step": 594
},
{
"epoch": 0.13,
"grad_norm": 0.2727169096469879,
"learning_rate": 9.750080586080445e-06,
"loss": 0.5165,
"step": 595
},
{
"epoch": 0.13,
"grad_norm": 0.21706987917423248,
"learning_rate": 9.748990127178589e-06,
"loss": 0.5346,
"step": 596
},
{
"epoch": 0.13,
"grad_norm": 0.20381216704845428,
"learning_rate": 9.747897355732684e-06,
"loss": 0.5546,
"step": 597
},
{
"epoch": 0.13,
"grad_norm": 0.1514424830675125,
"learning_rate": 9.746802272274868e-06,
"loss": 0.5593,
"step": 598
},
{
"epoch": 0.13,
"grad_norm": 0.16018468141555786,
"learning_rate": 9.745704877338393e-06,
"loss": 0.5303,
"step": 599
},
{
"epoch": 0.13,
"grad_norm": 0.1491565853357315,
"learning_rate": 9.74460517145764e-06,
"loss": 0.5265,
"step": 600
},
{
"epoch": 0.13,
"grad_norm": 0.1546594202518463,
"learning_rate": 9.743503155168119e-06,
"loss": 0.5193,
"step": 601
},
{
"epoch": 0.13,
"grad_norm": 0.18948593735694885,
"learning_rate": 9.74239882900646e-06,
"loss": 0.5794,
"step": 602
},
{
"epoch": 0.13,
"grad_norm": 0.16691826283931732,
"learning_rate": 9.74129219351042e-06,
"loss": 0.5327,
"step": 603
},
{
"epoch": 0.13,
"grad_norm": 0.15864987671375275,
"learning_rate": 9.740183249218883e-06,
"loss": 0.5189,
"step": 604
},
{
"epoch": 0.13,
"grad_norm": 0.17005395889282227,
"learning_rate": 9.739071996671851e-06,
"loss": 0.5345,
"step": 605
},
{
"epoch": 0.13,
"grad_norm": 0.16413751244544983,
"learning_rate": 9.737958436410459e-06,
"loss": 0.5135,
"step": 606
},
{
"epoch": 0.13,
"grad_norm": 0.16234463453292847,
"learning_rate": 9.736842568976957e-06,
"loss": 0.523,
"step": 607
},
{
"epoch": 0.13,
"grad_norm": 0.24228431284427643,
"learning_rate": 9.73572439491472e-06,
"loss": 0.5346,
"step": 608
},
{
"epoch": 0.13,
"grad_norm": 0.16661712527275085,
"learning_rate": 9.734603914768254e-06,
"loss": 0.5846,
"step": 609
},
{
"epoch": 0.13,
"grad_norm": 0.1640872210264206,
"learning_rate": 9.73348112908318e-06,
"loss": 0.5245,
"step": 610
},
{
"epoch": 0.13,
"grad_norm": 0.15542039275169373,
"learning_rate": 9.732356038406242e-06,
"loss": 0.5418,
"step": 611
},
{
"epoch": 0.13,
"grad_norm": 0.16618283092975616,
"learning_rate": 9.73122864328531e-06,
"loss": 0.5491,
"step": 612
},
{
"epoch": 0.13,
"grad_norm": 0.15610603988170624,
"learning_rate": 9.730098944269377e-06,
"loss": 0.5672,
"step": 613
},
{
"epoch": 0.13,
"grad_norm": 0.16698190569877625,
"learning_rate": 9.72896694190855e-06,
"loss": 0.5467,
"step": 614
},
{
"epoch": 0.13,
"grad_norm": 0.16580110788345337,
"learning_rate": 9.727832636754066e-06,
"loss": 0.5943,
"step": 615
},
{
"epoch": 0.13,
"grad_norm": 0.17117217183113098,
"learning_rate": 9.726696029358283e-06,
"loss": 0.5022,
"step": 616
},
{
"epoch": 0.13,
"grad_norm": 0.18671591579914093,
"learning_rate": 9.725557120274673e-06,
"loss": 0.544,
"step": 617
},
{
"epoch": 0.13,
"grad_norm": 0.1708557903766632,
"learning_rate": 9.724415910057839e-06,
"loss": 0.5172,
"step": 618
},
{
"epoch": 0.13,
"grad_norm": 0.17382651567459106,
"learning_rate": 9.723272399263492e-06,
"loss": 0.5278,
"step": 619
},
{
"epoch": 0.13,
"grad_norm": 0.20800824463367462,
"learning_rate": 9.722126588448473e-06,
"loss": 0.5484,
"step": 620
},
{
"epoch": 0.13,
"grad_norm": 0.1626998484134674,
"learning_rate": 9.720978478170745e-06,
"loss": 0.5248,
"step": 621
},
{
"epoch": 0.13,
"grad_norm": 0.20097678899765015,
"learning_rate": 9.719828068989378e-06,
"loss": 0.4871,
"step": 622
},
{
"epoch": 0.13,
"grad_norm": 0.17374666035175323,
"learning_rate": 9.718675361464574e-06,
"loss": 0.5118,
"step": 623
},
{
"epoch": 0.13,
"grad_norm": 0.16872042417526245,
"learning_rate": 9.717520356157648e-06,
"loss": 0.5554,
"step": 624
},
{
"epoch": 0.13,
"grad_norm": 0.1880810260772705,
"learning_rate": 9.716363053631039e-06,
"loss": 0.4936,
"step": 625
},
{
"epoch": 0.13,
"grad_norm": 0.20248694717884064,
"learning_rate": 9.715203454448297e-06,
"loss": 0.5005,
"step": 626
},
{
"epoch": 0.14,
"grad_norm": 0.20392434298992157,
"learning_rate": 9.714041559174095e-06,
"loss": 0.5389,
"step": 627
},
{
"epoch": 0.14,
"grad_norm": 0.16137023270130157,
"learning_rate": 9.712877368374226e-06,
"loss": 0.5599,
"step": 628
},
{
"epoch": 0.14,
"grad_norm": 0.17454127967357635,
"learning_rate": 9.711710882615595e-06,
"loss": 0.5127,
"step": 629
},
{
"epoch": 0.14,
"grad_norm": 0.13746435940265656,
"learning_rate": 9.710542102466229e-06,
"loss": 0.5617,
"step": 630
},
{
"epoch": 0.14,
"grad_norm": 0.16572695970535278,
"learning_rate": 9.709371028495276e-06,
"loss": 0.5421,
"step": 631
},
{
"epoch": 0.14,
"grad_norm": 0.21156027913093567,
"learning_rate": 9.708197661272989e-06,
"loss": 0.5373,
"step": 632
},
{
"epoch": 0.14,
"grad_norm": 0.1574900895357132,
"learning_rate": 9.707022001370749e-06,
"loss": 0.526,
"step": 633
},
{
"epoch": 0.14,
"grad_norm": 0.14249767363071442,
"learning_rate": 9.70584404936105e-06,
"loss": 0.5104,
"step": 634
},
{
"epoch": 0.14,
"grad_norm": 0.18953874707221985,
"learning_rate": 9.704663805817499e-06,
"loss": 0.54,
"step": 635
},
{
"epoch": 0.14,
"grad_norm": 0.17047786712646484,
"learning_rate": 9.703481271314823e-06,
"loss": 0.5185,
"step": 636
},
{
"epoch": 0.14,
"grad_norm": 0.19651903212070465,
"learning_rate": 9.702296446428863e-06,
"loss": 0.5147,
"step": 637
},
{
"epoch": 0.14,
"grad_norm": 0.13926255702972412,
"learning_rate": 9.701109331736573e-06,
"loss": 0.5381,
"step": 638
},
{
"epoch": 0.14,
"grad_norm": 0.16642118990421295,
"learning_rate": 9.699919927816027e-06,
"loss": 0.5114,
"step": 639
},
{
"epoch": 0.14,
"grad_norm": 0.19338329136371613,
"learning_rate": 9.69872823524641e-06,
"loss": 0.5317,
"step": 640
},
{
"epoch": 0.14,
"grad_norm": 0.26251357793807983,
"learning_rate": 9.697534254608024e-06,
"loss": 0.5122,
"step": 641
},
{
"epoch": 0.14,
"grad_norm": 0.1683933585882187,
"learning_rate": 9.69633798648228e-06,
"loss": 0.5429,
"step": 642
},
{
"epoch": 0.14,
"grad_norm": 0.20809942483901978,
"learning_rate": 9.695139431451712e-06,
"loss": 0.537,
"step": 643
},
{
"epoch": 0.14,
"grad_norm": 0.1489880532026291,
"learning_rate": 9.693938590099958e-06,
"loss": 0.5049,
"step": 644
},
{
"epoch": 0.14,
"grad_norm": 0.14825065433979034,
"learning_rate": 9.692735463011774e-06,
"loss": 0.496,
"step": 645
},
{
"epoch": 0.14,
"grad_norm": 0.17281201481819153,
"learning_rate": 9.691530050773031e-06,
"loss": 0.524,
"step": 646
},
{
"epoch": 0.14,
"grad_norm": 0.227024644613266,
"learning_rate": 9.690322353970708e-06,
"loss": 0.5191,
"step": 647
},
{
"epoch": 0.14,
"grad_norm": 0.16183902323246002,
"learning_rate": 9.689112373192899e-06,
"loss": 0.5557,
"step": 648
},
{
"epoch": 0.14,
"grad_norm": 0.19648505747318268,
"learning_rate": 9.687900109028813e-06,
"loss": 0.4963,
"step": 649
},
{
"epoch": 0.14,
"grad_norm": 0.1439117044210434,
"learning_rate": 9.686685562068765e-06,
"loss": 0.5512,
"step": 650
},
{
"epoch": 0.14,
"grad_norm": 0.15333153307437897,
"learning_rate": 9.685468732904187e-06,
"loss": 0.4566,
"step": 651
},
{
"epoch": 0.14,
"grad_norm": 0.16783007979393005,
"learning_rate": 9.684249622127616e-06,
"loss": 0.5197,
"step": 652
},
{
"epoch": 0.14,
"grad_norm": 0.1759708672761917,
"learning_rate": 9.683028230332707e-06,
"loss": 0.5086,
"step": 653
},
{
"epoch": 0.14,
"grad_norm": 0.18851730227470398,
"learning_rate": 9.681804558114222e-06,
"loss": 0.5563,
"step": 654
},
{
"epoch": 0.14,
"grad_norm": 0.18417790532112122,
"learning_rate": 9.680578606068037e-06,
"loss": 0.5028,
"step": 655
},
{
"epoch": 0.14,
"grad_norm": 0.1564049869775772,
"learning_rate": 9.67935037479113e-06,
"loss": 0.5071,
"step": 656
},
{
"epoch": 0.14,
"grad_norm": 0.18582746386528015,
"learning_rate": 9.678119864881597e-06,
"loss": 0.4922,
"step": 657
},
{
"epoch": 0.14,
"grad_norm": 0.16415338218212128,
"learning_rate": 9.676887076938642e-06,
"loss": 0.5226,
"step": 658
},
{
"epoch": 0.14,
"grad_norm": 0.19364799559116364,
"learning_rate": 9.675652011562576e-06,
"loss": 0.5294,
"step": 659
},
{
"epoch": 0.14,
"grad_norm": 0.19111143052577972,
"learning_rate": 9.674414669354819e-06,
"loss": 0.5486,
"step": 660
},
{
"epoch": 0.14,
"grad_norm": 0.1607770472764969,
"learning_rate": 9.673175050917902e-06,
"loss": 0.5674,
"step": 661
},
{
"epoch": 0.14,
"grad_norm": 0.16375142335891724,
"learning_rate": 9.671933156855464e-06,
"loss": 0.5305,
"step": 662
},
{
"epoch": 0.14,
"grad_norm": 0.2223857343196869,
"learning_rate": 9.67068898777225e-06,
"loss": 0.5073,
"step": 663
},
{
"epoch": 0.14,
"grad_norm": 0.15344950556755066,
"learning_rate": 9.669442544274115e-06,
"loss": 0.5176,
"step": 664
},
{
"epoch": 0.14,
"grad_norm": 0.1769074946641922,
"learning_rate": 9.66819382696802e-06,
"loss": 0.4888,
"step": 665
},
{
"epoch": 0.14,
"grad_norm": 0.14724504947662354,
"learning_rate": 9.666942836462036e-06,
"loss": 0.5251,
"step": 666
},
{
"epoch": 0.14,
"grad_norm": 0.14657099545001984,
"learning_rate": 9.665689573365336e-06,
"loss": 0.5271,
"step": 667
},
{
"epoch": 0.14,
"grad_norm": 0.16149552166461945,
"learning_rate": 9.664434038288207e-06,
"loss": 0.521,
"step": 668
},
{
"epoch": 0.14,
"grad_norm": 0.17453457415103912,
"learning_rate": 9.663176231842034e-06,
"loss": 0.5071,
"step": 669
},
{
"epoch": 0.14,
"grad_norm": 0.1480516791343689,
"learning_rate": 9.661916154639312e-06,
"loss": 0.598,
"step": 670
},
{
"epoch": 0.14,
"grad_norm": 0.17226625978946686,
"learning_rate": 9.660653807293643e-06,
"loss": 0.534,
"step": 671
},
{
"epoch": 0.14,
"grad_norm": 0.13685333728790283,
"learning_rate": 9.659389190419735e-06,
"loss": 0.5049,
"step": 672
},
{
"epoch": 0.14,
"grad_norm": 0.1628991812467575,
"learning_rate": 9.658122304633395e-06,
"loss": 0.5246,
"step": 673
},
{
"epoch": 0.15,
"grad_norm": 0.1640816479921341,
"learning_rate": 9.656853150551543e-06,
"loss": 0.5104,
"step": 674
},
{
"epoch": 0.15,
"grad_norm": 0.16424889862537384,
"learning_rate": 9.6555817287922e-06,
"loss": 0.5173,
"step": 675
},
{
"epoch": 0.15,
"grad_norm": 0.26323530077934265,
"learning_rate": 9.654308039974489e-06,
"loss": 0.5144,
"step": 676
},
{
"epoch": 0.15,
"grad_norm": 0.13345208764076233,
"learning_rate": 9.65303208471864e-06,
"loss": 0.5294,
"step": 677
},
{
"epoch": 0.15,
"grad_norm": 0.179282546043396,
"learning_rate": 9.651753863645985e-06,
"loss": 0.5211,
"step": 678
},
{
"epoch": 0.15,
"grad_norm": 0.1983976811170578,
"learning_rate": 9.650473377378961e-06,
"loss": 0.5435,
"step": 679
},
{
"epoch": 0.15,
"grad_norm": 0.18049369752407074,
"learning_rate": 9.649190626541105e-06,
"loss": 0.533,
"step": 680
},
{
"epoch": 0.15,
"grad_norm": 0.16596846282482147,
"learning_rate": 9.647905611757062e-06,
"loss": 0.5274,
"step": 681
},
{
"epoch": 0.15,
"grad_norm": 0.17268408834934235,
"learning_rate": 9.646618333652574e-06,
"loss": 0.5481,
"step": 682
},
{
"epoch": 0.15,
"grad_norm": 0.168728306889534,
"learning_rate": 9.64532879285449e-06,
"loss": 0.5201,
"step": 683
},
{
"epoch": 0.15,
"grad_norm": 0.2116057574748993,
"learning_rate": 9.644036989990753e-06,
"loss": 0.5107,
"step": 684
},
{
"epoch": 0.15,
"grad_norm": 0.14726531505584717,
"learning_rate": 9.642742925690417e-06,
"loss": 0.5546,
"step": 685
},
{
"epoch": 0.15,
"grad_norm": 0.17111736536026,
"learning_rate": 9.641446600583632e-06,
"loss": 0.5123,
"step": 686
},
{
"epoch": 0.15,
"grad_norm": 0.17838339507579803,
"learning_rate": 9.640148015301651e-06,
"loss": 0.4966,
"step": 687
},
{
"epoch": 0.15,
"grad_norm": 0.17207923531532288,
"learning_rate": 9.638847170476824e-06,
"loss": 0.5189,
"step": 688
},
{
"epoch": 0.15,
"grad_norm": 0.15716849267482758,
"learning_rate": 9.637544066742606e-06,
"loss": 0.5553,
"step": 689
},
{
"epoch": 0.15,
"grad_norm": 0.19608205556869507,
"learning_rate": 9.636238704733547e-06,
"loss": 0.5691,
"step": 690
},
{
"epoch": 0.15,
"grad_norm": 0.15424737334251404,
"learning_rate": 9.634931085085301e-06,
"loss": 0.5419,
"step": 691
},
{
"epoch": 0.15,
"grad_norm": 0.24781200289726257,
"learning_rate": 9.633621208434623e-06,
"loss": 0.5374,
"step": 692
},
{
"epoch": 0.15,
"grad_norm": 0.1594979614019394,
"learning_rate": 9.63230907541936e-06,
"loss": 0.5093,
"step": 693
},
{
"epoch": 0.15,
"grad_norm": 0.1622641682624817,
"learning_rate": 9.630994686678462e-06,
"loss": 0.5247,
"step": 694
},
{
"epoch": 0.15,
"grad_norm": 0.19124239683151245,
"learning_rate": 9.629678042851976e-06,
"loss": 0.5241,
"step": 695
},
{
"epoch": 0.15,
"grad_norm": 0.1495082974433899,
"learning_rate": 9.628359144581052e-06,
"loss": 0.5295,
"step": 696
},
{
"epoch": 0.15,
"grad_norm": 0.1647813469171524,
"learning_rate": 9.627037992507931e-06,
"loss": 0.494,
"step": 697
},
{
"epoch": 0.15,
"grad_norm": 0.16081197559833527,
"learning_rate": 9.625714587275954e-06,
"loss": 0.5414,
"step": 698
},
{
"epoch": 0.15,
"grad_norm": 0.14257070422172546,
"learning_rate": 9.624388929529563e-06,
"loss": 0.5634,
"step": 699
},
{
"epoch": 0.15,
"grad_norm": 0.1383073329925537,
"learning_rate": 9.623061019914291e-06,
"loss": 0.4961,
"step": 700
},
{
"epoch": 0.15,
"grad_norm": 0.1932617723941803,
"learning_rate": 9.621730859076768e-06,
"loss": 0.522,
"step": 701
},
{
"epoch": 0.15,
"grad_norm": 0.20005308091640472,
"learning_rate": 9.620398447664727e-06,
"loss": 0.522,
"step": 702
},
{
"epoch": 0.15,
"grad_norm": 0.17601189017295837,
"learning_rate": 9.61906378632699e-06,
"loss": 0.5707,
"step": 703
},
{
"epoch": 0.15,
"grad_norm": 0.14197023212909698,
"learning_rate": 9.617726875713477e-06,
"loss": 0.5194,
"step": 704
},
{
"epoch": 0.15,
"grad_norm": 0.17921584844589233,
"learning_rate": 9.616387716475203e-06,
"loss": 0.5067,
"step": 705
},
{
"epoch": 0.15,
"grad_norm": 0.1330891251564026,
"learning_rate": 9.615046309264278e-06,
"loss": 0.4925,
"step": 706
},
{
"epoch": 0.15,
"grad_norm": 0.19038861989974976,
"learning_rate": 9.613702654733908e-06,
"loss": 0.5745,
"step": 707
},
{
"epoch": 0.15,
"grad_norm": 0.2451518177986145,
"learning_rate": 9.612356753538392e-06,
"loss": 0.5799,
"step": 708
},
{
"epoch": 0.15,
"grad_norm": 0.20882856845855713,
"learning_rate": 9.611008606333121e-06,
"loss": 0.4886,
"step": 709
},
{
"epoch": 0.15,
"grad_norm": 0.170186385512352,
"learning_rate": 9.609658213774584e-06,
"loss": 0.5118,
"step": 710
},
{
"epoch": 0.15,
"grad_norm": 0.15260860323905945,
"learning_rate": 9.608305576520361e-06,
"loss": 0.5166,
"step": 711
},
{
"epoch": 0.15,
"grad_norm": 0.16833122074604034,
"learning_rate": 9.606950695229125e-06,
"loss": 0.5003,
"step": 712
},
{
"epoch": 0.15,
"grad_norm": 0.17692722380161285,
"learning_rate": 9.605593570560642e-06,
"loss": 0.5378,
"step": 713
},
{
"epoch": 0.15,
"grad_norm": 0.2011829912662506,
"learning_rate": 9.60423420317577e-06,
"loss": 0.531,
"step": 714
},
{
"epoch": 0.15,
"grad_norm": 0.1459263414144516,
"learning_rate": 9.602872593736461e-06,
"loss": 0.5278,
"step": 715
},
{
"epoch": 0.15,
"grad_norm": 0.15884311497211456,
"learning_rate": 9.601508742905757e-06,
"loss": 0.5615,
"step": 716
},
{
"epoch": 0.15,
"grad_norm": 0.2560180127620697,
"learning_rate": 9.600142651347792e-06,
"loss": 0.5295,
"step": 717
},
{
"epoch": 0.15,
"grad_norm": 0.15647375583648682,
"learning_rate": 9.59877431972779e-06,
"loss": 0.5028,
"step": 718
},
{
"epoch": 0.15,
"grad_norm": 0.21782688796520233,
"learning_rate": 9.597403748712067e-06,
"loss": 0.4902,
"step": 719
},
{
"epoch": 0.16,
"grad_norm": 0.16878049075603485,
"learning_rate": 9.596030938968028e-06,
"loss": 0.5524,
"step": 720
},
{
"epoch": 0.16,
"grad_norm": 0.1529654562473297,
"learning_rate": 9.594655891164174e-06,
"loss": 0.4946,
"step": 721
},
{
"epoch": 0.16,
"grad_norm": 0.2102820873260498,
"learning_rate": 9.593278605970086e-06,
"loss": 0.5093,
"step": 722
},
{
"epoch": 0.16,
"grad_norm": 0.13754625618457794,
"learning_rate": 9.591899084056444e-06,
"loss": 0.55,
"step": 723
},
{
"epoch": 0.16,
"grad_norm": 0.20235078036785126,
"learning_rate": 9.590517326095012e-06,
"loss": 0.5277,
"step": 724
},
{
"epoch": 0.16,
"grad_norm": 0.20487360656261444,
"learning_rate": 9.58913333275864e-06,
"loss": 0.5274,
"step": 725
},
{
"epoch": 0.16,
"grad_norm": 0.15242727100849152,
"learning_rate": 9.587747104721275e-06,
"loss": 0.5361,
"step": 726
},
{
"epoch": 0.16,
"grad_norm": 0.16651783883571625,
"learning_rate": 9.586358642657946e-06,
"loss": 0.5422,
"step": 727
},
{
"epoch": 0.16,
"grad_norm": 0.20768210291862488,
"learning_rate": 9.58496794724477e-06,
"loss": 0.5204,
"step": 728
},
{
"epoch": 0.16,
"grad_norm": 0.13769538700580597,
"learning_rate": 9.583575019158954e-06,
"loss": 0.5485,
"step": 729
},
{
"epoch": 0.16,
"grad_norm": 0.2392173558473587,
"learning_rate": 9.582179859078793e-06,
"loss": 0.5178,
"step": 730
},
{
"epoch": 0.16,
"grad_norm": 0.17117203772068024,
"learning_rate": 9.580782467683666e-06,
"loss": 0.4959,
"step": 731
},
{
"epoch": 0.16,
"grad_norm": 0.14463159441947937,
"learning_rate": 9.579382845654038e-06,
"loss": 0.5405,
"step": 732
},
{
"epoch": 0.16,
"grad_norm": 0.15378107130527496,
"learning_rate": 9.577980993671461e-06,
"loss": 0.5239,
"step": 733
},
{
"epoch": 0.16,
"grad_norm": 0.18154248595237732,
"learning_rate": 9.576576912418577e-06,
"loss": 0.5138,
"step": 734
},
{
"epoch": 0.16,
"grad_norm": 0.17718815803527832,
"learning_rate": 9.575170602579109e-06,
"loss": 0.5281,
"step": 735
},
{
"epoch": 0.16,
"grad_norm": 0.18913020193576813,
"learning_rate": 9.573762064837866e-06,
"loss": 0.4653,
"step": 736
},
{
"epoch": 0.16,
"grad_norm": 0.16615386307239532,
"learning_rate": 9.572351299880742e-06,
"loss": 0.4993,
"step": 737
},
{
"epoch": 0.16,
"grad_norm": 0.1711035966873169,
"learning_rate": 9.570938308394717e-06,
"loss": 0.5527,
"step": 738
},
{
"epoch": 0.16,
"grad_norm": 0.1759718656539917,
"learning_rate": 9.569523091067855e-06,
"loss": 0.4892,
"step": 739
},
{
"epoch": 0.16,
"grad_norm": 0.16556698083877563,
"learning_rate": 9.568105648589299e-06,
"loss": 0.512,
"step": 740
},
{
"epoch": 0.16,
"grad_norm": 0.16739937663078308,
"learning_rate": 9.566685981649283e-06,
"loss": 0.5167,
"step": 741
},
{
"epoch": 0.16,
"grad_norm": 0.16000035405158997,
"learning_rate": 9.565264090939122e-06,
"loss": 0.5528,
"step": 742
},
{
"epoch": 0.16,
"grad_norm": 0.2087719887495041,
"learning_rate": 9.563839977151208e-06,
"loss": 0.5447,
"step": 743
},
{
"epoch": 0.16,
"grad_norm": 0.17800335586071014,
"learning_rate": 9.562413640979024e-06,
"loss": 0.5615,
"step": 744
},
{
"epoch": 0.16,
"grad_norm": 0.13852566480636597,
"learning_rate": 9.56098508311713e-06,
"loss": 0.5196,
"step": 745
},
{
"epoch": 0.16,
"grad_norm": 0.15705984830856323,
"learning_rate": 9.55955430426117e-06,
"loss": 0.5286,
"step": 746
},
{
"epoch": 0.16,
"grad_norm": 0.13705521821975708,
"learning_rate": 9.558121305107868e-06,
"loss": 0.4874,
"step": 747
},
{
"epoch": 0.16,
"grad_norm": 0.1593395620584488,
"learning_rate": 9.556686086355032e-06,
"loss": 0.508,
"step": 748
},
{
"epoch": 0.16,
"grad_norm": 0.1956239640712738,
"learning_rate": 9.555248648701546e-06,
"loss": 0.5165,
"step": 749
},
{
"epoch": 0.16,
"grad_norm": 0.15301111340522766,
"learning_rate": 9.553808992847377e-06,
"loss": 0.5279,
"step": 750
},
{
"epoch": 0.16,
"grad_norm": 0.1944187432527542,
"learning_rate": 9.552367119493575e-06,
"loss": 0.5328,
"step": 751
},
{
"epoch": 0.16,
"grad_norm": 0.16133981943130493,
"learning_rate": 9.550923029342266e-06,
"loss": 0.5258,
"step": 752
},
{
"epoch": 0.16,
"grad_norm": 0.1575002670288086,
"learning_rate": 9.549476723096658e-06,
"loss": 0.4785,
"step": 753
},
{
"epoch": 0.16,
"grad_norm": 0.2158762514591217,
"learning_rate": 9.548028201461034e-06,
"loss": 0.5069,
"step": 754
},
{
"epoch": 0.16,
"grad_norm": 0.1875433325767517,
"learning_rate": 9.546577465140763e-06,
"loss": 0.5165,
"step": 755
},
{
"epoch": 0.16,
"grad_norm": 0.15603913366794586,
"learning_rate": 9.545124514842284e-06,
"loss": 0.523,
"step": 756
},
{
"epoch": 0.16,
"grad_norm": 0.15902650356292725,
"learning_rate": 9.543669351273122e-06,
"loss": 0.5527,
"step": 757
},
{
"epoch": 0.16,
"grad_norm": 0.14115546643733978,
"learning_rate": 9.542211975141871e-06,
"loss": 0.515,
"step": 758
},
{
"epoch": 0.16,
"grad_norm": 0.12460020929574966,
"learning_rate": 9.540752387158213e-06,
"loss": 0.5186,
"step": 759
},
{
"epoch": 0.16,
"grad_norm": 0.2988269627094269,
"learning_rate": 9.5392905880329e-06,
"loss": 0.5062,
"step": 760
},
{
"epoch": 0.16,
"grad_norm": 0.1358107179403305,
"learning_rate": 9.537826578477758e-06,
"loss": 0.5129,
"step": 761
},
{
"epoch": 0.16,
"grad_norm": 0.1808885931968689,
"learning_rate": 9.5363603592057e-06,
"loss": 0.5442,
"step": 762
},
{
"epoch": 0.16,
"grad_norm": 0.16095423698425293,
"learning_rate": 9.534891930930705e-06,
"loss": 0.5632,
"step": 763
},
{
"epoch": 0.16,
"grad_norm": 0.14927184581756592,
"learning_rate": 9.53342129436783e-06,
"loss": 0.5345,
"step": 764
},
{
"epoch": 0.16,
"grad_norm": 0.17672008275985718,
"learning_rate": 9.531948450233213e-06,
"loss": 0.5667,
"step": 765
},
{
"epoch": 0.17,
"grad_norm": 0.17709845304489136,
"learning_rate": 9.530473399244061e-06,
"loss": 0.5354,
"step": 766
},
{
"epoch": 0.17,
"grad_norm": 0.16679351031780243,
"learning_rate": 9.528996142118654e-06,
"loss": 0.5584,
"step": 767
},
{
"epoch": 0.17,
"grad_norm": 0.21075226366519928,
"learning_rate": 9.527516679576353e-06,
"loss": 0.4759,
"step": 768
},
{
"epoch": 0.17,
"grad_norm": 0.15864352881908417,
"learning_rate": 9.526035012337591e-06,
"loss": 0.5861,
"step": 769
},
{
"epoch": 0.17,
"grad_norm": 0.18424198031425476,
"learning_rate": 9.52455114112387e-06,
"loss": 0.5176,
"step": 770
},
{
"epoch": 0.17,
"grad_norm": 0.14614816009998322,
"learning_rate": 9.523065066657769e-06,
"loss": 0.5267,
"step": 771
},
{
"epoch": 0.17,
"grad_norm": 0.18655577301979065,
"learning_rate": 9.52157678966294e-06,
"loss": 0.5034,
"step": 772
},
{
"epoch": 0.17,
"grad_norm": 0.1492408663034439,
"learning_rate": 9.520086310864104e-06,
"loss": 0.5242,
"step": 773
},
{
"epoch": 0.17,
"grad_norm": 0.18119966983795166,
"learning_rate": 9.518593630987063e-06,
"loss": 0.503,
"step": 774
},
{
"epoch": 0.17,
"grad_norm": 0.2733058035373688,
"learning_rate": 9.51709875075868e-06,
"loss": 0.5293,
"step": 775
},
{
"epoch": 0.17,
"grad_norm": 0.15847504138946533,
"learning_rate": 9.515601670906895e-06,
"loss": 0.5012,
"step": 776
},
{
"epoch": 0.17,
"grad_norm": 0.17875181138515472,
"learning_rate": 9.51410239216072e-06,
"loss": 0.4895,
"step": 777
},
{
"epoch": 0.17,
"grad_norm": 0.19667181372642517,
"learning_rate": 9.512600915250232e-06,
"loss": 0.5493,
"step": 778
},
{
"epoch": 0.17,
"grad_norm": 0.1711205095052719,
"learning_rate": 9.511097240906588e-06,
"loss": 0.4674,
"step": 779
},
{
"epoch": 0.17,
"grad_norm": 0.18481481075286865,
"learning_rate": 9.509591369862007e-06,
"loss": 0.5166,
"step": 780
},
{
"epoch": 0.17,
"grad_norm": 0.15598368644714355,
"learning_rate": 9.50808330284978e-06,
"loss": 0.5697,
"step": 781
},
{
"epoch": 0.17,
"grad_norm": 0.19259214401245117,
"learning_rate": 9.506573040604268e-06,
"loss": 0.5114,
"step": 782
},
{
"epoch": 0.17,
"grad_norm": 0.14538073539733887,
"learning_rate": 9.5050605838609e-06,
"loss": 0.5485,
"step": 783
},
{
"epoch": 0.17,
"grad_norm": 0.18423911929130554,
"learning_rate": 9.503545933356175e-06,
"loss": 0.5254,
"step": 784
},
{
"epoch": 0.17,
"grad_norm": 0.1563284546136856,
"learning_rate": 9.50202908982766e-06,
"loss": 0.5266,
"step": 785
},
{
"epoch": 0.17,
"grad_norm": 0.16368651390075684,
"learning_rate": 9.500510054013989e-06,
"loss": 0.5289,
"step": 786
},
{
"epoch": 0.17,
"grad_norm": 0.16315564513206482,
"learning_rate": 9.498988826654863e-06,
"loss": 0.4904,
"step": 787
},
{
"epoch": 0.17,
"grad_norm": 0.15771108865737915,
"learning_rate": 9.49746540849105e-06,
"loss": 0.5132,
"step": 788
},
{
"epoch": 0.17,
"grad_norm": 0.19994409382343292,
"learning_rate": 9.49593980026439e-06,
"loss": 0.5498,
"step": 789
},
{
"epoch": 0.17,
"grad_norm": 0.13863793015480042,
"learning_rate": 9.494412002717784e-06,
"loss": 0.5206,
"step": 790
},
{
"epoch": 0.17,
"grad_norm": 0.17389997839927673,
"learning_rate": 9.4928820165952e-06,
"loss": 0.4742,
"step": 791
},
{
"epoch": 0.17,
"grad_norm": 0.15407484769821167,
"learning_rate": 9.49134984264167e-06,
"loss": 0.4783,
"step": 792
},
{
"epoch": 0.17,
"grad_norm": 0.15034940838813782,
"learning_rate": 9.489815481603297e-06,
"loss": 0.5066,
"step": 793
},
{
"epoch": 0.17,
"grad_norm": 0.14711235463619232,
"learning_rate": 9.488278934227242e-06,
"loss": 0.5068,
"step": 794
},
{
"epoch": 0.17,
"grad_norm": 0.17346839606761932,
"learning_rate": 9.48674020126174e-06,
"loss": 0.536,
"step": 795
},
{
"epoch": 0.17,
"grad_norm": 0.14369408786296844,
"learning_rate": 9.485199283456078e-06,
"loss": 0.4971,
"step": 796
},
{
"epoch": 0.17,
"grad_norm": 0.1965474933385849,
"learning_rate": 9.483656181560618e-06,
"loss": 0.5791,
"step": 797
},
{
"epoch": 0.17,
"grad_norm": 0.17605896294116974,
"learning_rate": 9.48211089632678e-06,
"loss": 0.5551,
"step": 798
},
{
"epoch": 0.17,
"grad_norm": 0.1731802225112915,
"learning_rate": 9.480563428507045e-06,
"loss": 0.4776,
"step": 799
},
{
"epoch": 0.17,
"grad_norm": 0.17883409559726715,
"learning_rate": 9.479013778854966e-06,
"loss": 0.5357,
"step": 800
},
{
"epoch": 0.17,
"grad_norm": 0.1549665927886963,
"learning_rate": 9.477461948125149e-06,
"loss": 0.4987,
"step": 801
},
{
"epoch": 0.17,
"grad_norm": 0.23310746252536774,
"learning_rate": 9.475907937073265e-06,
"loss": 0.5242,
"step": 802
},
{
"epoch": 0.17,
"grad_norm": 0.21235214173793793,
"learning_rate": 9.474351746456048e-06,
"loss": 0.4909,
"step": 803
},
{
"epoch": 0.17,
"grad_norm": 0.16170482337474823,
"learning_rate": 9.472793377031293e-06,
"loss": 0.4607,
"step": 804
},
{
"epoch": 0.17,
"grad_norm": 0.21534408628940582,
"learning_rate": 9.471232829557857e-06,
"loss": 0.5182,
"step": 805
},
{
"epoch": 0.17,
"grad_norm": 0.155525341629982,
"learning_rate": 9.469670104795655e-06,
"loss": 0.5337,
"step": 806
},
{
"epoch": 0.17,
"grad_norm": 0.1875993311405182,
"learning_rate": 9.468105203505661e-06,
"loss": 0.4955,
"step": 807
},
{
"epoch": 0.17,
"grad_norm": 0.1549602895975113,
"learning_rate": 9.466538126449915e-06,
"loss": 0.5879,
"step": 808
},
{
"epoch": 0.17,
"grad_norm": 0.22798140347003937,
"learning_rate": 9.464968874391511e-06,
"loss": 0.539,
"step": 809
},
{
"epoch": 0.17,
"grad_norm": 0.1601991057395935,
"learning_rate": 9.463397448094605e-06,
"loss": 0.4695,
"step": 810
},
{
"epoch": 0.17,
"grad_norm": 0.16516649723052979,
"learning_rate": 9.46182384832441e-06,
"loss": 0.5621,
"step": 811
},
{
"epoch": 0.17,
"grad_norm": 0.14943736791610718,
"learning_rate": 9.460248075847199e-06,
"loss": 0.5337,
"step": 812
},
{
"epoch": 0.18,
"grad_norm": 0.1822364181280136,
"learning_rate": 9.4586701314303e-06,
"loss": 0.5071,
"step": 813
},
{
"epoch": 0.18,
"grad_norm": 0.16500526666641235,
"learning_rate": 9.457090015842104e-06,
"loss": 0.483,
"step": 814
},
{
"epoch": 0.18,
"grad_norm": 0.1568198800086975,
"learning_rate": 9.455507729852053e-06,
"loss": 0.496,
"step": 815
},
{
"epoch": 0.18,
"grad_norm": 0.17206601798534393,
"learning_rate": 9.453923274230653e-06,
"loss": 0.5544,
"step": 816
},
{
"epoch": 0.18,
"grad_norm": 0.15982304513454437,
"learning_rate": 9.452336649749458e-06,
"loss": 0.5124,
"step": 817
},
{
"epoch": 0.18,
"grad_norm": 0.19488324224948883,
"learning_rate": 9.450747857181084e-06,
"loss": 0.4981,
"step": 818
},
{
"epoch": 0.18,
"grad_norm": 0.23650221526622772,
"learning_rate": 9.449156897299202e-06,
"loss": 0.5373,
"step": 819
},
{
"epoch": 0.18,
"grad_norm": 0.15237529575824738,
"learning_rate": 9.447563770878535e-06,
"loss": 0.5248,
"step": 820
},
{
"epoch": 0.18,
"grad_norm": 0.15700353682041168,
"learning_rate": 9.44596847869487e-06,
"loss": 0.5289,
"step": 821
},
{
"epoch": 0.18,
"grad_norm": 0.17049898207187653,
"learning_rate": 9.444371021525036e-06,
"loss": 0.5195,
"step": 822
},
{
"epoch": 0.18,
"grad_norm": 0.18980465829372406,
"learning_rate": 9.442771400146926e-06,
"loss": 0.5191,
"step": 823
},
{
"epoch": 0.18,
"grad_norm": 0.14770746231079102,
"learning_rate": 9.441169615339482e-06,
"loss": 0.4799,
"step": 824
},
{
"epoch": 0.18,
"grad_norm": 0.1894197016954422,
"learning_rate": 9.439565667882702e-06,
"loss": 0.5771,
"step": 825
},
{
"epoch": 0.18,
"grad_norm": 0.17405198514461517,
"learning_rate": 9.437959558557635e-06,
"loss": 0.5276,
"step": 826
},
{
"epoch": 0.18,
"grad_norm": 0.2038612961769104,
"learning_rate": 9.436351288146383e-06,
"loss": 0.4888,
"step": 827
},
{
"epoch": 0.18,
"grad_norm": 0.18169601261615753,
"learning_rate": 9.434740857432105e-06,
"loss": 0.5273,
"step": 828
},
{
"epoch": 0.18,
"grad_norm": 0.19223563373088837,
"learning_rate": 9.433128267199006e-06,
"loss": 0.534,
"step": 829
},
{
"epoch": 0.18,
"grad_norm": 0.20077872276306152,
"learning_rate": 9.431513518232343e-06,
"loss": 0.5153,
"step": 830
},
{
"epoch": 0.18,
"grad_norm": 0.1688869744539261,
"learning_rate": 9.429896611318428e-06,
"loss": 0.5408,
"step": 831
},
{
"epoch": 0.18,
"grad_norm": 0.24384887516498566,
"learning_rate": 9.42827754724462e-06,
"loss": 0.5771,
"step": 832
},
{
"epoch": 0.18,
"grad_norm": 0.15766644477844238,
"learning_rate": 9.426656326799333e-06,
"loss": 0.4948,
"step": 833
},
{
"epoch": 0.18,
"grad_norm": 0.1572624146938324,
"learning_rate": 9.425032950772025e-06,
"loss": 0.5612,
"step": 834
},
{
"epoch": 0.18,
"grad_norm": 0.15511459112167358,
"learning_rate": 9.42340741995321e-06,
"loss": 0.544,
"step": 835
},
{
"epoch": 0.18,
"grad_norm": 0.1777951866388321,
"learning_rate": 9.421779735134446e-06,
"loss": 0.5394,
"step": 836
},
{
"epoch": 0.18,
"grad_norm": 0.2677023410797119,
"learning_rate": 9.420149897108341e-06,
"loss": 0.484,
"step": 837
},
{
"epoch": 0.18,
"grad_norm": 0.1472686529159546,
"learning_rate": 9.418517906668556e-06,
"loss": 0.4913,
"step": 838
},
{
"epoch": 0.18,
"grad_norm": 0.15383826196193695,
"learning_rate": 9.416883764609797e-06,
"loss": 0.4718,
"step": 839
},
{
"epoch": 0.18,
"grad_norm": 0.19486670196056366,
"learning_rate": 9.415247471727813e-06,
"loss": 0.527,
"step": 840
},
{
"epoch": 0.18,
"grad_norm": 0.16585233807563782,
"learning_rate": 9.413609028819409e-06,
"loss": 0.5039,
"step": 841
},
{
"epoch": 0.18,
"grad_norm": 0.18775971233844757,
"learning_rate": 9.41196843668243e-06,
"loss": 0.4744,
"step": 842
},
{
"epoch": 0.18,
"grad_norm": 0.16499534249305725,
"learning_rate": 9.410325696115775e-06,
"loss": 0.5376,
"step": 843
},
{
"epoch": 0.18,
"grad_norm": 0.1950598657131195,
"learning_rate": 9.408680807919377e-06,
"loss": 0.5213,
"step": 844
},
{
"epoch": 0.18,
"grad_norm": 0.14264388382434845,
"learning_rate": 9.407033772894229e-06,
"loss": 0.566,
"step": 845
},
{
"epoch": 0.18,
"grad_norm": 0.16956187784671783,
"learning_rate": 9.405384591842358e-06,
"loss": 0.5058,
"step": 846
},
{
"epoch": 0.18,
"grad_norm": 0.13649915158748627,
"learning_rate": 9.403733265566848e-06,
"loss": 0.4948,
"step": 847
},
{
"epoch": 0.18,
"grad_norm": 0.1546815037727356,
"learning_rate": 9.402079794871812e-06,
"loss": 0.5087,
"step": 848
},
{
"epoch": 0.18,
"grad_norm": 0.17630915343761444,
"learning_rate": 9.400424180562421e-06,
"loss": 0.5477,
"step": 849
},
{
"epoch": 0.18,
"grad_norm": 0.19923992455005646,
"learning_rate": 9.398766423444883e-06,
"loss": 0.5332,
"step": 850
},
{
"epoch": 0.18,
"grad_norm": 0.1514226794242859,
"learning_rate": 9.397106524326449e-06,
"loss": 0.5278,
"step": 851
},
{
"epoch": 0.18,
"grad_norm": 0.17602422833442688,
"learning_rate": 9.39544448401542e-06,
"loss": 0.4708,
"step": 852
},
{
"epoch": 0.18,
"grad_norm": 0.17394909262657166,
"learning_rate": 9.393780303321128e-06,
"loss": 0.5128,
"step": 853
},
{
"epoch": 0.18,
"grad_norm": 0.14890971779823303,
"learning_rate": 9.392113983053958e-06,
"loss": 0.4967,
"step": 854
},
{
"epoch": 0.18,
"grad_norm": 0.18306109309196472,
"learning_rate": 9.390445524025336e-06,
"loss": 0.4917,
"step": 855
},
{
"epoch": 0.18,
"grad_norm": 0.16756963729858398,
"learning_rate": 9.38877492704772e-06,
"loss": 0.5143,
"step": 856
},
{
"epoch": 0.18,
"grad_norm": 0.15101511776447296,
"learning_rate": 9.387102192934618e-06,
"loss": 0.5214,
"step": 857
},
{
"epoch": 0.18,
"grad_norm": 0.21072083711624146,
"learning_rate": 9.385427322500575e-06,
"loss": 0.5188,
"step": 858
},
{
"epoch": 0.19,
"grad_norm": 0.3193773627281189,
"learning_rate": 9.38375031656118e-06,
"loss": 0.5248,
"step": 859
},
{
"epoch": 0.19,
"grad_norm": 0.17284849286079407,
"learning_rate": 9.382071175933058e-06,
"loss": 0.5331,
"step": 860
},
{
"epoch": 0.19,
"grad_norm": 0.16421107947826385,
"learning_rate": 9.380389901433875e-06,
"loss": 0.5512,
"step": 861
},
{
"epoch": 0.19,
"grad_norm": 0.19052369892597198,
"learning_rate": 9.378706493882335e-06,
"loss": 0.5485,
"step": 862
},
{
"epoch": 0.19,
"grad_norm": 0.20467452704906464,
"learning_rate": 9.377020954098181e-06,
"loss": 0.5334,
"step": 863
},
{
"epoch": 0.19,
"grad_norm": 0.14375852048397064,
"learning_rate": 9.375333282902198e-06,
"loss": 0.5574,
"step": 864
},
{
"epoch": 0.19,
"grad_norm": 0.16476349532604218,
"learning_rate": 9.3736434811162e-06,
"loss": 0.542,
"step": 865
},
{
"epoch": 0.19,
"grad_norm": 0.18003122508525848,
"learning_rate": 9.37195154956305e-06,
"loss": 0.5179,
"step": 866
},
{
"epoch": 0.19,
"grad_norm": 0.17590996623039246,
"learning_rate": 9.37025748906664e-06,
"loss": 0.5528,
"step": 867
},
{
"epoch": 0.19,
"grad_norm": 0.20183418691158295,
"learning_rate": 9.368561300451902e-06,
"loss": 0.544,
"step": 868
},
{
"epoch": 0.19,
"grad_norm": 0.1688835769891739,
"learning_rate": 9.366862984544802e-06,
"loss": 0.4812,
"step": 869
},
{
"epoch": 0.19,
"grad_norm": 0.21423234045505524,
"learning_rate": 9.365162542172346e-06,
"loss": 0.5428,
"step": 870
},
{
"epoch": 0.19,
"grad_norm": 0.15897509455680847,
"learning_rate": 9.363459974162568e-06,
"loss": 0.5227,
"step": 871
},
{
"epoch": 0.19,
"grad_norm": 0.16136378049850464,
"learning_rate": 9.361755281344547e-06,
"loss": 0.555,
"step": 872
},
{
"epoch": 0.19,
"grad_norm": 0.1796402931213379,
"learning_rate": 9.360048464548386e-06,
"loss": 0.4782,
"step": 873
},
{
"epoch": 0.19,
"grad_norm": 0.531697690486908,
"learning_rate": 9.358339524605233e-06,
"loss": 0.5207,
"step": 874
},
{
"epoch": 0.19,
"grad_norm": 0.15121889114379883,
"learning_rate": 9.356628462347264e-06,
"loss": 0.4837,
"step": 875
},
{
"epoch": 0.19,
"grad_norm": 0.17013150453567505,
"learning_rate": 9.354915278607685e-06,
"loss": 0.4911,
"step": 876
},
{
"epoch": 0.19,
"grad_norm": 0.18947632610797882,
"learning_rate": 9.353199974220744e-06,
"loss": 0.5029,
"step": 877
},
{
"epoch": 0.19,
"grad_norm": 0.13245789706707,
"learning_rate": 9.351482550021713e-06,
"loss": 0.4782,
"step": 878
},
{
"epoch": 0.19,
"grad_norm": 0.21231511235237122,
"learning_rate": 9.349763006846903e-06,
"loss": 0.5535,
"step": 879
},
{
"epoch": 0.19,
"grad_norm": 0.18766769766807556,
"learning_rate": 9.348041345533653e-06,
"loss": 0.5222,
"step": 880
},
{
"epoch": 0.19,
"grad_norm": 0.16258256137371063,
"learning_rate": 9.346317566920335e-06,
"loss": 0.4873,
"step": 881
},
{
"epoch": 0.19,
"grad_norm": 0.14111053943634033,
"learning_rate": 9.34459167184635e-06,
"loss": 0.4795,
"step": 882
},
{
"epoch": 0.19,
"grad_norm": 0.20663069188594818,
"learning_rate": 9.342863661152133e-06,
"loss": 0.5221,
"step": 883
},
{
"epoch": 0.19,
"grad_norm": 0.1376432627439499,
"learning_rate": 9.341133535679145e-06,
"loss": 0.464,
"step": 884
},
{
"epoch": 0.19,
"grad_norm": 0.15190206468105316,
"learning_rate": 9.33940129626988e-06,
"loss": 0.5118,
"step": 885
},
{
"epoch": 0.19,
"grad_norm": 0.16546842455863953,
"learning_rate": 9.337666943767863e-06,
"loss": 0.5256,
"step": 886
},
{
"epoch": 0.19,
"grad_norm": 0.1859419345855713,
"learning_rate": 9.335930479017642e-06,
"loss": 0.562,
"step": 887
},
{
"epoch": 0.19,
"grad_norm": 0.2912534475326538,
"learning_rate": 9.334191902864799e-06,
"loss": 0.5298,
"step": 888
},
{
"epoch": 0.19,
"grad_norm": 0.16982656717300415,
"learning_rate": 9.33245121615594e-06,
"loss": 0.4953,
"step": 889
},
{
"epoch": 0.19,
"grad_norm": 0.21246029436588287,
"learning_rate": 9.330708419738704e-06,
"loss": 0.5222,
"step": 890
},
{
"epoch": 0.19,
"grad_norm": 0.3462158739566803,
"learning_rate": 9.328963514461753e-06,
"loss": 0.5451,
"step": 891
},
{
"epoch": 0.19,
"grad_norm": 0.14150933921337128,
"learning_rate": 9.327216501174775e-06,
"loss": 0.5529,
"step": 892
},
{
"epoch": 0.19,
"grad_norm": 0.15398851037025452,
"learning_rate": 9.32546738072849e-06,
"loss": 0.5258,
"step": 893
},
{
"epoch": 0.19,
"grad_norm": 0.14066843688488007,
"learning_rate": 9.323716153974639e-06,
"loss": 0.5097,
"step": 894
},
{
"epoch": 0.19,
"grad_norm": 0.1923949271440506,
"learning_rate": 9.321962821765991e-06,
"loss": 0.5511,
"step": 895
},
{
"epoch": 0.19,
"grad_norm": 0.2550576627254486,
"learning_rate": 9.320207384956339e-06,
"loss": 0.5541,
"step": 896
},
{
"epoch": 0.19,
"grad_norm": 0.178908571600914,
"learning_rate": 9.318449844400504e-06,
"loss": 0.5135,
"step": 897
},
{
"epoch": 0.19,
"grad_norm": 0.19086679816246033,
"learning_rate": 9.316690200954324e-06,
"loss": 0.5143,
"step": 898
},
{
"epoch": 0.19,
"grad_norm": 0.1290796995162964,
"learning_rate": 9.31492845547467e-06,
"loss": 0.502,
"step": 899
},
{
"epoch": 0.19,
"grad_norm": 0.14759333431720734,
"learning_rate": 9.313164608819434e-06,
"loss": 0.5287,
"step": 900
},
{
"epoch": 0.19,
"grad_norm": 0.158295676112175,
"learning_rate": 9.311398661847526e-06,
"loss": 0.56,
"step": 901
},
{
"epoch": 0.19,
"grad_norm": 0.15422774851322174,
"learning_rate": 9.309630615418884e-06,
"loss": 0.5334,
"step": 902
},
{
"epoch": 0.19,
"grad_norm": 0.16427233815193176,
"learning_rate": 9.307860470394467e-06,
"loss": 0.5364,
"step": 903
},
{
"epoch": 0.19,
"grad_norm": 0.21173103153705597,
"learning_rate": 9.306088227636257e-06,
"loss": 0.5094,
"step": 904
},
{
"epoch": 0.19,
"grad_norm": 0.21965515613555908,
"learning_rate": 9.304313888007254e-06,
"loss": 0.5219,
"step": 905
},
{
"epoch": 0.2,
"grad_norm": 0.15674197673797607,
"learning_rate": 9.302537452371482e-06,
"loss": 0.5188,
"step": 906
},
{
"epoch": 0.2,
"grad_norm": 0.19992782175540924,
"learning_rate": 9.300758921593986e-06,
"loss": 0.499,
"step": 907
},
{
"epoch": 0.2,
"grad_norm": 0.17217914760112762,
"learning_rate": 9.298978296540829e-06,
"loss": 0.5364,
"step": 908
},
{
"epoch": 0.2,
"grad_norm": 0.17133580148220062,
"learning_rate": 9.297195578079096e-06,
"loss": 0.4968,
"step": 909
},
{
"epoch": 0.2,
"grad_norm": 0.23099388182163239,
"learning_rate": 9.295410767076891e-06,
"loss": 0.5252,
"step": 910
},
{
"epoch": 0.2,
"grad_norm": 0.16104039549827576,
"learning_rate": 9.293623864403336e-06,
"loss": 0.4742,
"step": 911
},
{
"epoch": 0.2,
"grad_norm": 0.12712013721466064,
"learning_rate": 9.291834870928573e-06,
"loss": 0.559,
"step": 912
},
{
"epoch": 0.2,
"grad_norm": 0.17714501917362213,
"learning_rate": 9.29004378752376e-06,
"loss": 0.6085,
"step": 913
},
{
"epoch": 0.2,
"grad_norm": 0.16740843653678894,
"learning_rate": 9.288250615061073e-06,
"loss": 0.5035,
"step": 914
},
{
"epoch": 0.2,
"grad_norm": 0.19126859307289124,
"learning_rate": 9.286455354413707e-06,
"loss": 0.5777,
"step": 915
},
{
"epoch": 0.2,
"grad_norm": 0.14088517427444458,
"learning_rate": 9.284658006455871e-06,
"loss": 0.5092,
"step": 916
},
{
"epoch": 0.2,
"grad_norm": 0.14722870290279388,
"learning_rate": 9.282858572062795e-06,
"loss": 0.5206,
"step": 917
},
{
"epoch": 0.2,
"grad_norm": 0.1408064216375351,
"learning_rate": 9.281057052110725e-06,
"loss": 0.5287,
"step": 918
},
{
"epoch": 0.2,
"grad_norm": 0.1396157294511795,
"learning_rate": 9.279253447476914e-06,
"loss": 0.5116,
"step": 919
},
{
"epoch": 0.2,
"grad_norm": 0.14657460153102875,
"learning_rate": 9.27744775903964e-06,
"loss": 0.5108,
"step": 920
},
{
"epoch": 0.2,
"grad_norm": 0.17514435946941376,
"learning_rate": 9.27563998767819e-06,
"loss": 0.5112,
"step": 921
},
{
"epoch": 0.2,
"grad_norm": 0.17996759712696075,
"learning_rate": 9.27383013427287e-06,
"loss": 0.495,
"step": 922
},
{
"epoch": 0.2,
"grad_norm": 0.18228891491889954,
"learning_rate": 9.272018199704993e-06,
"loss": 0.4843,
"step": 923
},
{
"epoch": 0.2,
"grad_norm": 0.18271513283252716,
"learning_rate": 9.270204184856893e-06,
"loss": 0.5625,
"step": 924
},
{
"epoch": 0.2,
"grad_norm": 0.18662293255329132,
"learning_rate": 9.26838809061191e-06,
"loss": 0.5065,
"step": 925
},
{
"epoch": 0.2,
"grad_norm": 0.15625204145908356,
"learning_rate": 9.266569917854403e-06,
"loss": 0.5557,
"step": 926
},
{
"epoch": 0.2,
"grad_norm": 0.16261446475982666,
"learning_rate": 9.264749667469737e-06,
"loss": 0.5583,
"step": 927
},
{
"epoch": 0.2,
"grad_norm": 0.14734329283237457,
"learning_rate": 9.262927340344296e-06,
"loss": 0.567,
"step": 928
},
{
"epoch": 0.2,
"grad_norm": 0.18826404213905334,
"learning_rate": 9.261102937365468e-06,
"loss": 0.5309,
"step": 929
},
{
"epoch": 0.2,
"grad_norm": 0.18732258677482605,
"learning_rate": 9.259276459421655e-06,
"loss": 0.525,
"step": 930
},
{
"epoch": 0.2,
"grad_norm": 0.176020547747612,
"learning_rate": 9.257447907402272e-06,
"loss": 0.5187,
"step": 931
},
{
"epoch": 0.2,
"grad_norm": 0.15038305521011353,
"learning_rate": 9.255617282197739e-06,
"loss": 0.5049,
"step": 932
},
{
"epoch": 0.2,
"grad_norm": 0.15459555387496948,
"learning_rate": 9.253784584699488e-06,
"loss": 0.5021,
"step": 933
},
{
"epoch": 0.2,
"grad_norm": 0.16818863153457642,
"learning_rate": 9.25194981579996e-06,
"loss": 0.5109,
"step": 934
},
{
"epoch": 0.2,
"grad_norm": 0.158711776137352,
"learning_rate": 9.250112976392608e-06,
"loss": 0.5235,
"step": 935
},
{
"epoch": 0.2,
"grad_norm": 0.13350459933280945,
"learning_rate": 9.248274067371886e-06,
"loss": 0.5624,
"step": 936
},
{
"epoch": 0.2,
"grad_norm": 0.16148029267787933,
"learning_rate": 9.24643308963326e-06,
"loss": 0.5562,
"step": 937
},
{
"epoch": 0.2,
"grad_norm": 0.17886267602443695,
"learning_rate": 9.244590044073205e-06,
"loss": 0.5252,
"step": 938
},
{
"epoch": 0.2,
"grad_norm": 0.18493221700191498,
"learning_rate": 9.2427449315892e-06,
"loss": 0.5195,
"step": 939
},
{
"epoch": 0.2,
"grad_norm": 0.1529918760061264,
"learning_rate": 9.240897753079734e-06,
"loss": 0.517,
"step": 940
},
{
"epoch": 0.2,
"grad_norm": 0.18862253427505493,
"learning_rate": 9.239048509444296e-06,
"loss": 0.5214,
"step": 941
},
{
"epoch": 0.2,
"grad_norm": 0.1629784107208252,
"learning_rate": 9.237197201583386e-06,
"loss": 0.5421,
"step": 942
},
{
"epoch": 0.2,
"grad_norm": 0.2280578315258026,
"learning_rate": 9.235343830398506e-06,
"loss": 0.5033,
"step": 943
},
{
"epoch": 0.2,
"grad_norm": 0.15682753920555115,
"learning_rate": 9.233488396792167e-06,
"loss": 0.562,
"step": 944
},
{
"epoch": 0.2,
"grad_norm": 0.16542381048202515,
"learning_rate": 9.231630901667879e-06,
"loss": 0.5448,
"step": 945
},
{
"epoch": 0.2,
"grad_norm": 0.1738227754831314,
"learning_rate": 9.22977134593016e-06,
"loss": 0.5662,
"step": 946
},
{
"epoch": 0.2,
"grad_norm": 0.13837432861328125,
"learning_rate": 9.227909730484527e-06,
"loss": 0.5259,
"step": 947
},
{
"epoch": 0.2,
"grad_norm": 0.1606243997812271,
"learning_rate": 9.226046056237508e-06,
"loss": 0.5666,
"step": 948
},
{
"epoch": 0.2,
"grad_norm": 0.14131122827529907,
"learning_rate": 9.224180324096623e-06,
"loss": 0.5486,
"step": 949
},
{
"epoch": 0.2,
"grad_norm": 0.13392627239227295,
"learning_rate": 9.222312534970403e-06,
"loss": 0.4792,
"step": 950
},
{
"epoch": 0.2,
"grad_norm": 0.1538127064704895,
"learning_rate": 9.220442689768376e-06,
"loss": 0.484,
"step": 951
},
{
"epoch": 0.21,
"grad_norm": 0.13414621353149414,
"learning_rate": 9.218570789401071e-06,
"loss": 0.5123,
"step": 952
},
{
"epoch": 0.21,
"grad_norm": 0.17911511659622192,
"learning_rate": 9.21669683478002e-06,
"loss": 0.5549,
"step": 953
},
{
"epoch": 0.21,
"grad_norm": 0.19138379395008087,
"learning_rate": 9.214820826817754e-06,
"loss": 0.4892,
"step": 954
},
{
"epoch": 0.21,
"grad_norm": 0.20988555252552032,
"learning_rate": 9.212942766427806e-06,
"loss": 0.498,
"step": 955
},
{
"epoch": 0.21,
"grad_norm": 0.13097749650478363,
"learning_rate": 9.211062654524705e-06,
"loss": 0.4603,
"step": 956
},
{
"epoch": 0.21,
"grad_norm": 0.1466490477323532,
"learning_rate": 9.20918049202398e-06,
"loss": 0.4924,
"step": 957
},
{
"epoch": 0.21,
"grad_norm": 0.23887225985527039,
"learning_rate": 9.207296279842162e-06,
"loss": 0.5725,
"step": 958
},
{
"epoch": 0.21,
"grad_norm": 0.13960181176662445,
"learning_rate": 9.205410018896775e-06,
"loss": 0.5444,
"step": 959
},
{
"epoch": 0.21,
"grad_norm": 0.21269811689853668,
"learning_rate": 9.203521710106344e-06,
"loss": 0.5672,
"step": 960
},
{
"epoch": 0.21,
"grad_norm": 0.1967892199754715,
"learning_rate": 9.201631354390391e-06,
"loss": 0.5674,
"step": 961
},
{
"epoch": 0.21,
"grad_norm": 0.20892930030822754,
"learning_rate": 9.199738952669431e-06,
"loss": 0.4915,
"step": 962
},
{
"epoch": 0.21,
"grad_norm": 0.15803126990795135,
"learning_rate": 9.197844505864982e-06,
"loss": 0.4839,
"step": 963
},
{
"epoch": 0.21,
"grad_norm": 0.17779715359210968,
"learning_rate": 9.195948014899551e-06,
"loss": 0.5204,
"step": 964
},
{
"epoch": 0.21,
"grad_norm": 0.1472802758216858,
"learning_rate": 9.194049480696647e-06,
"loss": 0.5691,
"step": 965
},
{
"epoch": 0.21,
"grad_norm": 0.19076858460903168,
"learning_rate": 9.192148904180769e-06,
"loss": 0.555,
"step": 966
},
{
"epoch": 0.21,
"grad_norm": 0.15820792317390442,
"learning_rate": 9.19024628627741e-06,
"loss": 0.5462,
"step": 967
},
{
"epoch": 0.21,
"grad_norm": 0.1319994479417801,
"learning_rate": 9.188341627913061e-06,
"loss": 0.5487,
"step": 968
},
{
"epoch": 0.21,
"grad_norm": 0.24205906689167023,
"learning_rate": 9.186434930015205e-06,
"loss": 0.518,
"step": 969
},
{
"epoch": 0.21,
"grad_norm": 0.15955299139022827,
"learning_rate": 9.184526193512318e-06,
"loss": 0.5596,
"step": 970
},
{
"epoch": 0.21,
"grad_norm": 0.16520148515701294,
"learning_rate": 9.182615419333867e-06,
"loss": 0.5647,
"step": 971
},
{
"epoch": 0.21,
"grad_norm": 0.2001345306634903,
"learning_rate": 9.180702608410314e-06,
"loss": 0.544,
"step": 972
},
{
"epoch": 0.21,
"grad_norm": 0.17887279391288757,
"learning_rate": 9.178787761673111e-06,
"loss": 0.5225,
"step": 973
},
{
"epoch": 0.21,
"grad_norm": 0.15997150540351868,
"learning_rate": 9.176870880054704e-06,
"loss": 0.5674,
"step": 974
},
{
"epoch": 0.21,
"grad_norm": 0.14125515520572662,
"learning_rate": 9.174951964488528e-06,
"loss": 0.5542,
"step": 975
},
{
"epoch": 0.21,
"grad_norm": 0.1298058182001114,
"learning_rate": 9.173031015909005e-06,
"loss": 0.5015,
"step": 976
},
{
"epoch": 0.21,
"grad_norm": 0.17486491799354553,
"learning_rate": 9.17110803525155e-06,
"loss": 0.569,
"step": 977
},
{
"epoch": 0.21,
"grad_norm": 0.18652723729610443,
"learning_rate": 9.169183023452574e-06,
"loss": 0.5062,
"step": 978
},
{
"epoch": 0.21,
"grad_norm": 0.1338779628276825,
"learning_rate": 9.167255981449466e-06,
"loss": 0.5122,
"step": 979
},
{
"epoch": 0.21,
"grad_norm": 0.13061174750328064,
"learning_rate": 9.165326910180608e-06,
"loss": 0.4903,
"step": 980
},
{
"epoch": 0.21,
"grad_norm": 0.13457538187503815,
"learning_rate": 9.163395810585374e-06,
"loss": 0.5316,
"step": 981
},
{
"epoch": 0.21,
"grad_norm": 0.14567075669765472,
"learning_rate": 9.161462683604118e-06,
"loss": 0.5241,
"step": 982
},
{
"epoch": 0.21,
"grad_norm": 0.2161962240934372,
"learning_rate": 9.159527530178191e-06,
"loss": 0.513,
"step": 983
},
{
"epoch": 0.21,
"grad_norm": 0.14327682554721832,
"learning_rate": 9.157590351249923e-06,
"loss": 0.5493,
"step": 984
},
{
"epoch": 0.21,
"grad_norm": 0.14309169352054596,
"learning_rate": 9.155651147762631e-06,
"loss": 0.514,
"step": 985
},
{
"epoch": 0.21,
"grad_norm": 0.16566166281700134,
"learning_rate": 9.153709920660624e-06,
"loss": 0.4916,
"step": 986
},
{
"epoch": 0.21,
"grad_norm": 0.18244121968746185,
"learning_rate": 9.151766670889186e-06,
"loss": 0.5397,
"step": 987
},
{
"epoch": 0.21,
"grad_norm": 0.1684887707233429,
"learning_rate": 9.149821399394597e-06,
"loss": 0.5094,
"step": 988
},
{
"epoch": 0.21,
"grad_norm": 0.15885643661022186,
"learning_rate": 9.147874107124114e-06,
"loss": 0.5258,
"step": 989
},
{
"epoch": 0.21,
"grad_norm": 0.2527085542678833,
"learning_rate": 9.145924795025984e-06,
"loss": 0.5456,
"step": 990
},
{
"epoch": 0.21,
"grad_norm": 0.20791400969028473,
"learning_rate": 9.14397346404943e-06,
"loss": 0.5137,
"step": 991
},
{
"epoch": 0.21,
"grad_norm": 0.18550600111484528,
"learning_rate": 9.142020115144662e-06,
"loss": 0.4834,
"step": 992
},
{
"epoch": 0.21,
"grad_norm": 0.15677522122859955,
"learning_rate": 9.140064749262876e-06,
"loss": 0.5201,
"step": 993
},
{
"epoch": 0.21,
"grad_norm": 0.15685126185417175,
"learning_rate": 9.138107367356247e-06,
"loss": 0.4838,
"step": 994
},
{
"epoch": 0.21,
"grad_norm": 0.13539238274097443,
"learning_rate": 9.136147970377926e-06,
"loss": 0.5323,
"step": 995
},
{
"epoch": 0.21,
"grad_norm": 0.18492737412452698,
"learning_rate": 9.134186559282058e-06,
"loss": 0.5457,
"step": 996
},
{
"epoch": 0.21,
"grad_norm": 0.14817145466804504,
"learning_rate": 9.132223135023759e-06,
"loss": 0.5151,
"step": 997
},
{
"epoch": 0.21,
"grad_norm": 0.17167848348617554,
"learning_rate": 9.130257698559129e-06,
"loss": 0.5397,
"step": 998
},
{
"epoch": 0.22,
"grad_norm": 0.15762774646282196,
"learning_rate": 9.128290250845244e-06,
"loss": 0.527,
"step": 999
},
{
"epoch": 0.22,
"grad_norm": 0.20650818943977356,
"learning_rate": 9.126320792840165e-06,
"loss": 0.5657,
"step": 1000
},
{
"epoch": 0.22,
"grad_norm": 0.192567840218544,
"learning_rate": 9.124349325502928e-06,
"loss": 0.5291,
"step": 1001
},
{
"epoch": 0.22,
"grad_norm": 0.13800346851348877,
"learning_rate": 9.12237584979355e-06,
"loss": 0.526,
"step": 1002
},
{
"epoch": 0.22,
"grad_norm": 0.12781374156475067,
"learning_rate": 9.120400366673024e-06,
"loss": 0.5068,
"step": 1003
},
{
"epoch": 0.22,
"grad_norm": 0.1455235779285431,
"learning_rate": 9.11842287710332e-06,
"loss": 0.4949,
"step": 1004
},
{
"epoch": 0.22,
"grad_norm": 0.16621056199073792,
"learning_rate": 9.116443382047391e-06,
"loss": 0.5166,
"step": 1005
},
{
"epoch": 0.22,
"grad_norm": 0.19221191108226776,
"learning_rate": 9.114461882469154e-06,
"loss": 0.5088,
"step": 1006
},
{
"epoch": 0.22,
"grad_norm": 0.15902382135391235,
"learning_rate": 9.112478379333517e-06,
"loss": 0.5388,
"step": 1007
},
{
"epoch": 0.22,
"grad_norm": 0.13084392249584198,
"learning_rate": 9.110492873606351e-06,
"loss": 0.4672,
"step": 1008
},
{
"epoch": 0.22,
"grad_norm": 0.15393121540546417,
"learning_rate": 9.108505366254512e-06,
"loss": 0.5063,
"step": 1009
},
{
"epoch": 0.22,
"grad_norm": 0.16303934156894684,
"learning_rate": 9.106515858245825e-06,
"loss": 0.545,
"step": 1010
},
{
"epoch": 0.22,
"grad_norm": 0.16543173789978027,
"learning_rate": 9.10452435054909e-06,
"loss": 0.5345,
"step": 1011
},
{
"epoch": 0.22,
"grad_norm": 0.16311848163604736,
"learning_rate": 9.102530844134084e-06,
"loss": 0.4611,
"step": 1012
},
{
"epoch": 0.22,
"grad_norm": 0.16494883596897125,
"learning_rate": 9.10053533997155e-06,
"loss": 0.4955,
"step": 1013
},
{
"epoch": 0.22,
"grad_norm": 0.14451864361763,
"learning_rate": 9.098537839033213e-06,
"loss": 0.4997,
"step": 1014
},
{
"epoch": 0.22,
"grad_norm": 0.20046649873256683,
"learning_rate": 9.096538342291763e-06,
"loss": 0.5718,
"step": 1015
},
{
"epoch": 0.22,
"grad_norm": 0.1361169070005417,
"learning_rate": 9.094536850720867e-06,
"loss": 0.4561,
"step": 1016
},
{
"epoch": 0.22,
"grad_norm": 0.1675615757703781,
"learning_rate": 9.09253336529516e-06,
"loss": 0.5372,
"step": 1017
},
{
"epoch": 0.22,
"grad_norm": 0.22339864075183868,
"learning_rate": 9.090527886990249e-06,
"loss": 0.5611,
"step": 1018
},
{
"epoch": 0.22,
"grad_norm": 0.17522381246089935,
"learning_rate": 9.088520416782712e-06,
"loss": 0.5352,
"step": 1019
},
{
"epoch": 0.22,
"grad_norm": 0.13996882736682892,
"learning_rate": 9.086510955650095e-06,
"loss": 0.4947,
"step": 1020
},
{
"epoch": 0.22,
"grad_norm": 0.15913517773151398,
"learning_rate": 9.084499504570918e-06,
"loss": 0.4947,
"step": 1021
},
{
"epoch": 0.22,
"grad_norm": 0.17235067486763,
"learning_rate": 9.082486064524663e-06,
"loss": 0.53,
"step": 1022
},
{
"epoch": 0.22,
"grad_norm": 0.2162034660577774,
"learning_rate": 9.080470636491787e-06,
"loss": 0.4904,
"step": 1023
},
{
"epoch": 0.22,
"grad_norm": 0.21353678405284882,
"learning_rate": 9.078453221453714e-06,
"loss": 0.5088,
"step": 1024
},
{
"epoch": 0.22,
"grad_norm": 0.1277047097682953,
"learning_rate": 9.076433820392831e-06,
"loss": 0.5207,
"step": 1025
},
{
"epoch": 0.22,
"grad_norm": 0.15845198929309845,
"learning_rate": 9.074412434292496e-06,
"loss": 0.5951,
"step": 1026
},
{
"epoch": 0.22,
"grad_norm": 0.17977949976921082,
"learning_rate": 9.072389064137035e-06,
"loss": 0.5098,
"step": 1027
},
{
"epoch": 0.22,
"grad_norm": 0.15521718561649323,
"learning_rate": 9.070363710911736e-06,
"loss": 0.5513,
"step": 1028
},
{
"epoch": 0.22,
"grad_norm": 0.14528630673885345,
"learning_rate": 9.068336375602853e-06,
"loss": 0.4895,
"step": 1029
},
{
"epoch": 0.22,
"grad_norm": 0.16791880130767822,
"learning_rate": 9.066307059197612e-06,
"loss": 0.528,
"step": 1030
},
{
"epoch": 0.22,
"grad_norm": 0.1570877581834793,
"learning_rate": 9.064275762684194e-06,
"loss": 0.4957,
"step": 1031
},
{
"epoch": 0.22,
"grad_norm": 0.130596324801445,
"learning_rate": 9.062242487051752e-06,
"loss": 0.5338,
"step": 1032
},
{
"epoch": 0.22,
"grad_norm": 0.14908380806446075,
"learning_rate": 9.060207233290396e-06,
"loss": 0.5295,
"step": 1033
},
{
"epoch": 0.22,
"grad_norm": 0.18400724232196808,
"learning_rate": 9.058170002391205e-06,
"loss": 0.5265,
"step": 1034
},
{
"epoch": 0.22,
"grad_norm": 0.1491273045539856,
"learning_rate": 9.05613079534622e-06,
"loss": 0.4974,
"step": 1035
},
{
"epoch": 0.22,
"grad_norm": 0.1835760623216629,
"learning_rate": 9.05408961314844e-06,
"loss": 0.5317,
"step": 1036
},
{
"epoch": 0.22,
"grad_norm": 0.14263573288917542,
"learning_rate": 9.052046456791829e-06,
"loss": 0.4928,
"step": 1037
},
{
"epoch": 0.22,
"grad_norm": 0.3876129686832428,
"learning_rate": 9.050001327271314e-06,
"loss": 0.5149,
"step": 1038
},
{
"epoch": 0.22,
"grad_norm": 0.16249504685401917,
"learning_rate": 9.04795422558278e-06,
"loss": 0.5251,
"step": 1039
},
{
"epoch": 0.22,
"grad_norm": 0.16931766271591187,
"learning_rate": 9.045905152723074e-06,
"loss": 0.5532,
"step": 1040
},
{
"epoch": 0.22,
"grad_norm": 0.1582767814397812,
"learning_rate": 9.043854109689998e-06,
"loss": 0.4976,
"step": 1041
},
{
"epoch": 0.22,
"grad_norm": 0.15859778225421906,
"learning_rate": 9.041801097482323e-06,
"loss": 0.4995,
"step": 1042
},
{
"epoch": 0.22,
"grad_norm": 0.18055035173892975,
"learning_rate": 9.03974611709977e-06,
"loss": 0.493,
"step": 1043
},
{
"epoch": 0.22,
"grad_norm": 0.16349811851978302,
"learning_rate": 9.037689169543024e-06,
"loss": 0.5102,
"step": 1044
},
{
"epoch": 0.23,
"grad_norm": 0.19477395713329315,
"learning_rate": 9.035630255813724e-06,
"loss": 0.5361,
"step": 1045
},
{
"epoch": 0.23,
"grad_norm": 0.2538851499557495,
"learning_rate": 9.033569376914467e-06,
"loss": 0.5118,
"step": 1046
},
{
"epoch": 0.23,
"grad_norm": 0.16743601858615875,
"learning_rate": 9.031506533848811e-06,
"loss": 0.5127,
"step": 1047
},
{
"epoch": 0.23,
"grad_norm": 0.1517488956451416,
"learning_rate": 9.029441727621267e-06,
"loss": 0.4791,
"step": 1048
},
{
"epoch": 0.23,
"grad_norm": 0.17050126194953918,
"learning_rate": 9.0273749592373e-06,
"loss": 0.5652,
"step": 1049
},
{
"epoch": 0.23,
"grad_norm": 0.20682963728904724,
"learning_rate": 9.025306229703334e-06,
"loss": 0.5183,
"step": 1050
},
{
"epoch": 0.23,
"grad_norm": 0.16146351397037506,
"learning_rate": 9.02323554002675e-06,
"loss": 0.5112,
"step": 1051
},
{
"epoch": 0.23,
"grad_norm": 0.23130019009113312,
"learning_rate": 9.021162891215879e-06,
"loss": 0.5573,
"step": 1052
},
{
"epoch": 0.23,
"grad_norm": 0.15757335722446442,
"learning_rate": 9.019088284280004e-06,
"loss": 0.5232,
"step": 1053
},
{
"epoch": 0.23,
"grad_norm": 0.14029166102409363,
"learning_rate": 9.017011720229368e-06,
"loss": 0.5329,
"step": 1054
},
{
"epoch": 0.23,
"grad_norm": 0.14857496321201324,
"learning_rate": 9.014933200075165e-06,
"loss": 0.514,
"step": 1055
},
{
"epoch": 0.23,
"grad_norm": 0.17802828550338745,
"learning_rate": 9.012852724829539e-06,
"loss": 0.5324,
"step": 1056
},
{
"epoch": 0.23,
"grad_norm": 0.18392032384872437,
"learning_rate": 9.010770295505587e-06,
"loss": 0.603,
"step": 1057
},
{
"epoch": 0.23,
"grad_norm": 0.13357198238372803,
"learning_rate": 9.008685913117361e-06,
"loss": 0.4848,
"step": 1058
},
{
"epoch": 0.23,
"grad_norm": 0.2151726484298706,
"learning_rate": 9.006599578679859e-06,
"loss": 0.4963,
"step": 1059
},
{
"epoch": 0.23,
"grad_norm": 0.1715989112854004,
"learning_rate": 9.00451129320903e-06,
"loss": 0.5639,
"step": 1060
},
{
"epoch": 0.23,
"grad_norm": 0.19878040254116058,
"learning_rate": 9.002421057721781e-06,
"loss": 0.5452,
"step": 1061
},
{
"epoch": 0.23,
"grad_norm": 0.16640903055667877,
"learning_rate": 9.000328873235955e-06,
"loss": 0.5471,
"step": 1062
},
{
"epoch": 0.23,
"grad_norm": 0.15267455577850342,
"learning_rate": 8.998234740770358e-06,
"loss": 0.545,
"step": 1063
},
{
"epoch": 0.23,
"grad_norm": 0.1756962686777115,
"learning_rate": 8.996138661344734e-06,
"loss": 0.5793,
"step": 1064
},
{
"epoch": 0.23,
"grad_norm": 0.1579316258430481,
"learning_rate": 8.994040635979779e-06,
"loss": 0.466,
"step": 1065
},
{
"epoch": 0.23,
"grad_norm": 0.14408744871616364,
"learning_rate": 8.99194066569714e-06,
"loss": 0.5637,
"step": 1066
},
{
"epoch": 0.23,
"grad_norm": 0.20260116457939148,
"learning_rate": 8.989838751519404e-06,
"loss": 0.5361,
"step": 1067
},
{
"epoch": 0.23,
"grad_norm": 0.17308081686496735,
"learning_rate": 8.987734894470111e-06,
"loss": 0.5083,
"step": 1068
},
{
"epoch": 0.23,
"grad_norm": 0.21290896832942963,
"learning_rate": 8.985629095573743e-06,
"loss": 0.5312,
"step": 1069
},
{
"epoch": 0.23,
"grad_norm": 0.15569837391376495,
"learning_rate": 8.983521355855731e-06,
"loss": 0.5513,
"step": 1070
},
{
"epoch": 0.23,
"grad_norm": 0.169041246175766,
"learning_rate": 8.98141167634245e-06,
"loss": 0.5262,
"step": 1071
},
{
"epoch": 0.23,
"grad_norm": 0.15449997782707214,
"learning_rate": 8.979300058061214e-06,
"loss": 0.5301,
"step": 1072
},
{
"epoch": 0.23,
"grad_norm": 0.15848426520824432,
"learning_rate": 8.977186502040288e-06,
"loss": 0.556,
"step": 1073
},
{
"epoch": 0.23,
"grad_norm": 0.1425653100013733,
"learning_rate": 8.97507100930888e-06,
"loss": 0.489,
"step": 1074
},
{
"epoch": 0.23,
"grad_norm": 0.1488298773765564,
"learning_rate": 8.97295358089714e-06,
"loss": 0.5091,
"step": 1075
},
{
"epoch": 0.23,
"grad_norm": 0.2116803079843521,
"learning_rate": 8.97083421783616e-06,
"loss": 0.5654,
"step": 1076
},
{
"epoch": 0.23,
"grad_norm": 0.17678038775920868,
"learning_rate": 8.96871292115797e-06,
"loss": 0.5485,
"step": 1077
},
{
"epoch": 0.23,
"grad_norm": 0.2219185084104538,
"learning_rate": 8.96658969189555e-06,
"loss": 0.5414,
"step": 1078
},
{
"epoch": 0.23,
"grad_norm": 0.18654341995716095,
"learning_rate": 8.964464531082817e-06,
"loss": 0.4603,
"step": 1079
},
{
"epoch": 0.23,
"grad_norm": 0.29177331924438477,
"learning_rate": 8.962337439754627e-06,
"loss": 0.5267,
"step": 1080
},
{
"epoch": 0.23,
"grad_norm": 0.15607115626335144,
"learning_rate": 8.960208418946778e-06,
"loss": 0.5295,
"step": 1081
},
{
"epoch": 0.23,
"grad_norm": 0.161067396402359,
"learning_rate": 8.958077469696007e-06,
"loss": 0.5795,
"step": 1082
},
{
"epoch": 0.23,
"grad_norm": 0.1314525008201599,
"learning_rate": 8.955944593039991e-06,
"loss": 0.5274,
"step": 1083
},
{
"epoch": 0.23,
"grad_norm": 0.1945776492357254,
"learning_rate": 8.953809790017342e-06,
"loss": 0.4744,
"step": 1084
},
{
"epoch": 0.23,
"grad_norm": 0.1876978725194931,
"learning_rate": 8.951673061667616e-06,
"loss": 0.5036,
"step": 1085
},
{
"epoch": 0.23,
"grad_norm": 0.1536783128976822,
"learning_rate": 8.949534409031305e-06,
"loss": 0.5387,
"step": 1086
},
{
"epoch": 0.23,
"grad_norm": 0.15228869020938873,
"learning_rate": 8.94739383314983e-06,
"loss": 0.4566,
"step": 1087
},
{
"epoch": 0.23,
"grad_norm": 0.17565909028053284,
"learning_rate": 8.94525133506556e-06,
"loss": 0.4965,
"step": 1088
},
{
"epoch": 0.23,
"grad_norm": 0.17287708818912506,
"learning_rate": 8.943106915821793e-06,
"loss": 0.505,
"step": 1089
},
{
"epoch": 0.23,
"grad_norm": 0.13172249495983124,
"learning_rate": 8.940960576462763e-06,
"loss": 0.522,
"step": 1090
},
{
"epoch": 0.24,
"grad_norm": 0.14747697114944458,
"learning_rate": 8.938812318033646e-06,
"loss": 0.5058,
"step": 1091
},
{
"epoch": 0.24,
"grad_norm": 0.16435351967811584,
"learning_rate": 8.93666214158054e-06,
"loss": 0.5572,
"step": 1092
},
{
"epoch": 0.24,
"grad_norm": 0.12998394668102264,
"learning_rate": 8.93451004815049e-06,
"loss": 0.4825,
"step": 1093
},
{
"epoch": 0.24,
"grad_norm": 0.2101740837097168,
"learning_rate": 8.932356038791465e-06,
"loss": 0.5399,
"step": 1094
},
{
"epoch": 0.24,
"grad_norm": 0.14743265509605408,
"learning_rate": 8.930200114552371e-06,
"loss": 0.4891,
"step": 1095
},
{
"epoch": 0.24,
"grad_norm": 0.19330647587776184,
"learning_rate": 8.928042276483048e-06,
"loss": 0.5756,
"step": 1096
},
{
"epoch": 0.24,
"grad_norm": 0.14885154366493225,
"learning_rate": 8.925882525634262e-06,
"loss": 0.4704,
"step": 1097
},
{
"epoch": 0.24,
"grad_norm": 0.17634066939353943,
"learning_rate": 8.923720863057718e-06,
"loss": 0.4969,
"step": 1098
},
{
"epoch": 0.24,
"grad_norm": 0.16363896429538727,
"learning_rate": 8.921557289806045e-06,
"loss": 0.5074,
"step": 1099
},
{
"epoch": 0.24,
"grad_norm": 0.20823244750499725,
"learning_rate": 8.919391806932807e-06,
"loss": 0.5217,
"step": 1100
},
{
"epoch": 0.24,
"grad_norm": 0.16124127805233002,
"learning_rate": 8.917224415492497e-06,
"loss": 0.4827,
"step": 1101
},
{
"epoch": 0.24,
"grad_norm": 0.16462095081806183,
"learning_rate": 8.915055116540538e-06,
"loss": 0.5878,
"step": 1102
},
{
"epoch": 0.24,
"grad_norm": 0.1553676277399063,
"learning_rate": 8.912883911133276e-06,
"loss": 0.4883,
"step": 1103
},
{
"epoch": 0.24,
"grad_norm": 0.17461282014846802,
"learning_rate": 8.910710800327996e-06,
"loss": 0.4893,
"step": 1104
},
{
"epoch": 0.24,
"grad_norm": 0.179164856672287,
"learning_rate": 8.908535785182902e-06,
"loss": 0.4993,
"step": 1105
},
{
"epoch": 0.24,
"grad_norm": 0.16661059856414795,
"learning_rate": 8.906358866757128e-06,
"loss": 0.4797,
"step": 1106
},
{
"epoch": 0.24,
"grad_norm": 0.15980976819992065,
"learning_rate": 8.904180046110736e-06,
"loss": 0.5167,
"step": 1107
},
{
"epoch": 0.24,
"grad_norm": 0.15015141665935516,
"learning_rate": 8.901999324304713e-06,
"loss": 0.4971,
"step": 1108
},
{
"epoch": 0.24,
"grad_norm": 0.15872696042060852,
"learning_rate": 8.899816702400973e-06,
"loss": 0.5469,
"step": 1109
},
{
"epoch": 0.24,
"grad_norm": 0.13243776559829712,
"learning_rate": 8.897632181462354e-06,
"loss": 0.5135,
"step": 1110
},
{
"epoch": 0.24,
"grad_norm": 0.1544090360403061,
"learning_rate": 8.895445762552618e-06,
"loss": 0.4792,
"step": 1111
},
{
"epoch": 0.24,
"grad_norm": 0.15280136466026306,
"learning_rate": 8.893257446736455e-06,
"loss": 0.4888,
"step": 1112
},
{
"epoch": 0.24,
"grad_norm": 0.14897377789020538,
"learning_rate": 8.891067235079473e-06,
"loss": 0.4846,
"step": 1113
},
{
"epoch": 0.24,
"grad_norm": 0.21805572509765625,
"learning_rate": 8.888875128648208e-06,
"loss": 0.5184,
"step": 1114
},
{
"epoch": 0.24,
"grad_norm": 0.15725421905517578,
"learning_rate": 8.886681128510118e-06,
"loss": 0.5857,
"step": 1115
},
{
"epoch": 0.24,
"grad_norm": 0.1463284194469452,
"learning_rate": 8.884485235733579e-06,
"loss": 0.4969,
"step": 1116
},
{
"epoch": 0.24,
"grad_norm": 0.1490708589553833,
"learning_rate": 8.882287451387894e-06,
"loss": 0.5814,
"step": 1117
},
{
"epoch": 0.24,
"grad_norm": 0.20178869366645813,
"learning_rate": 8.880087776543287e-06,
"loss": 0.5091,
"step": 1118
},
{
"epoch": 0.24,
"grad_norm": 0.1965067982673645,
"learning_rate": 8.877886212270897e-06,
"loss": 0.4933,
"step": 1119
},
{
"epoch": 0.24,
"grad_norm": 0.16523069143295288,
"learning_rate": 8.875682759642786e-06,
"loss": 0.5445,
"step": 1120
},
{
"epoch": 0.24,
"grad_norm": 0.1690714955329895,
"learning_rate": 8.873477419731938e-06,
"loss": 0.5567,
"step": 1121
},
{
"epoch": 0.24,
"grad_norm": 0.18909381330013275,
"learning_rate": 8.871270193612254e-06,
"loss": 0.5133,
"step": 1122
},
{
"epoch": 0.24,
"grad_norm": 0.1338469237089157,
"learning_rate": 8.869061082358555e-06,
"loss": 0.4958,
"step": 1123
},
{
"epoch": 0.24,
"grad_norm": 0.1580471694469452,
"learning_rate": 8.866850087046574e-06,
"loss": 0.5595,
"step": 1124
},
{
"epoch": 0.24,
"grad_norm": 0.1788654625415802,
"learning_rate": 8.864637208752972e-06,
"loss": 0.5481,
"step": 1125
},
{
"epoch": 0.24,
"grad_norm": 0.20803380012512207,
"learning_rate": 8.862422448555317e-06,
"loss": 0.5478,
"step": 1126
},
{
"epoch": 0.24,
"grad_norm": 0.19867488741874695,
"learning_rate": 8.860205807532097e-06,
"loss": 0.4927,
"step": 1127
},
{
"epoch": 0.24,
"grad_norm": 0.13807149231433868,
"learning_rate": 8.857987286762718e-06,
"loss": 0.5021,
"step": 1128
},
{
"epoch": 0.24,
"grad_norm": 0.15068547427654266,
"learning_rate": 8.8557668873275e-06,
"loss": 0.4993,
"step": 1129
},
{
"epoch": 0.24,
"grad_norm": 0.14488062262535095,
"learning_rate": 8.853544610307675e-06,
"loss": 0.4815,
"step": 1130
},
{
"epoch": 0.24,
"grad_norm": 0.15107618272304535,
"learning_rate": 8.851320456785394e-06,
"loss": 0.5086,
"step": 1131
},
{
"epoch": 0.24,
"grad_norm": 0.16421128809452057,
"learning_rate": 8.84909442784372e-06,
"loss": 0.4844,
"step": 1132
},
{
"epoch": 0.24,
"grad_norm": 0.17027032375335693,
"learning_rate": 8.846866524566624e-06,
"loss": 0.4721,
"step": 1133
},
{
"epoch": 0.24,
"grad_norm": 0.2614370584487915,
"learning_rate": 8.844636748038999e-06,
"loss": 0.5745,
"step": 1134
},
{
"epoch": 0.24,
"grad_norm": 0.15496228635311127,
"learning_rate": 8.842405099346645e-06,
"loss": 0.5499,
"step": 1135
},
{
"epoch": 0.24,
"grad_norm": 0.1893419474363327,
"learning_rate": 8.840171579576273e-06,
"loss": 0.4691,
"step": 1136
},
{
"epoch": 0.24,
"grad_norm": 0.13554450869560242,
"learning_rate": 8.837936189815507e-06,
"loss": 0.54,
"step": 1137
},
{
"epoch": 0.25,
"grad_norm": 0.12900054454803467,
"learning_rate": 8.83569893115288e-06,
"loss": 0.479,
"step": 1138
},
{
"epoch": 0.25,
"grad_norm": 0.1491711586713791,
"learning_rate": 8.83345980467784e-06,
"loss": 0.5322,
"step": 1139
},
{
"epoch": 0.25,
"grad_norm": 0.16243106126785278,
"learning_rate": 8.831218811480735e-06,
"loss": 0.4434,
"step": 1140
},
{
"epoch": 0.25,
"grad_norm": 0.16812367737293243,
"learning_rate": 8.828975952652833e-06,
"loss": 0.5024,
"step": 1141
},
{
"epoch": 0.25,
"grad_norm": 0.1857740879058838,
"learning_rate": 8.8267312292863e-06,
"loss": 0.5696,
"step": 1142
},
{
"epoch": 0.25,
"grad_norm": 0.13055641949176788,
"learning_rate": 8.824484642474217e-06,
"loss": 0.4787,
"step": 1143
},
{
"epoch": 0.25,
"grad_norm": 0.17672252655029297,
"learning_rate": 8.822236193310574e-06,
"loss": 0.5788,
"step": 1144
},
{
"epoch": 0.25,
"grad_norm": 0.15305279195308685,
"learning_rate": 8.81998588289026e-06,
"loss": 0.503,
"step": 1145
},
{
"epoch": 0.25,
"grad_norm": 0.15624657273292542,
"learning_rate": 8.817733712309078e-06,
"loss": 0.5346,
"step": 1146
},
{
"epoch": 0.25,
"grad_norm": 0.14786425232887268,
"learning_rate": 8.815479682663729e-06,
"loss": 0.5083,
"step": 1147
},
{
"epoch": 0.25,
"grad_norm": 0.19573761522769928,
"learning_rate": 8.813223795051828e-06,
"loss": 0.5298,
"step": 1148
},
{
"epoch": 0.25,
"grad_norm": 0.1662847250699997,
"learning_rate": 8.810966050571888e-06,
"loss": 0.533,
"step": 1149
},
{
"epoch": 0.25,
"grad_norm": 0.1873636543750763,
"learning_rate": 8.80870645032333e-06,
"loss": 0.4825,
"step": 1150
},
{
"epoch": 0.25,
"grad_norm": 0.1731029748916626,
"learning_rate": 8.806444995406475e-06,
"loss": 0.488,
"step": 1151
},
{
"epoch": 0.25,
"grad_norm": 0.18040412664413452,
"learning_rate": 8.804181686922555e-06,
"loss": 0.5282,
"step": 1152
},
{
"epoch": 0.25,
"grad_norm": 0.15593977272510529,
"learning_rate": 8.801916525973696e-06,
"loss": 0.5124,
"step": 1153
},
{
"epoch": 0.25,
"grad_norm": 0.15248659253120422,
"learning_rate": 8.799649513662926e-06,
"loss": 0.513,
"step": 1154
},
{
"epoch": 0.25,
"grad_norm": 0.14471983909606934,
"learning_rate": 8.797380651094182e-06,
"loss": 0.504,
"step": 1155
},
{
"epoch": 0.25,
"grad_norm": 0.1660238355398178,
"learning_rate": 8.795109939372298e-06,
"loss": 0.5266,
"step": 1156
},
{
"epoch": 0.25,
"grad_norm": 0.15838298201560974,
"learning_rate": 8.792837379603005e-06,
"loss": 0.5438,
"step": 1157
},
{
"epoch": 0.25,
"grad_norm": 0.17816348373889923,
"learning_rate": 8.79056297289294e-06,
"loss": 0.5428,
"step": 1158
},
{
"epoch": 0.25,
"grad_norm": 0.1319669485092163,
"learning_rate": 8.788286720349638e-06,
"loss": 0.5487,
"step": 1159
},
{
"epoch": 0.25,
"grad_norm": 0.14675050973892212,
"learning_rate": 8.786008623081526e-06,
"loss": 0.5409,
"step": 1160
},
{
"epoch": 0.25,
"grad_norm": 0.16564631462097168,
"learning_rate": 8.783728682197935e-06,
"loss": 0.5405,
"step": 1161
},
{
"epoch": 0.25,
"grad_norm": 0.1422412395477295,
"learning_rate": 8.781446898809101e-06,
"loss": 0.5069,
"step": 1162
},
{
"epoch": 0.25,
"grad_norm": 0.16499634087085724,
"learning_rate": 8.77916327402614e-06,
"loss": 0.5038,
"step": 1163
},
{
"epoch": 0.25,
"grad_norm": 0.19437891244888306,
"learning_rate": 8.776877808961082e-06,
"loss": 0.5249,
"step": 1164
},
{
"epoch": 0.25,
"grad_norm": 0.16480234265327454,
"learning_rate": 8.774590504726842e-06,
"loss": 0.5104,
"step": 1165
},
{
"epoch": 0.25,
"grad_norm": 0.12336334586143494,
"learning_rate": 8.772301362437233e-06,
"loss": 0.497,
"step": 1166
},
{
"epoch": 0.25,
"grad_norm": 0.19107873737812042,
"learning_rate": 8.770010383206967e-06,
"loss": 0.5441,
"step": 1167
},
{
"epoch": 0.25,
"grad_norm": 0.16102471947669983,
"learning_rate": 8.767717568151643e-06,
"loss": 0.4736,
"step": 1168
},
{
"epoch": 0.25,
"grad_norm": 0.14254657924175262,
"learning_rate": 8.765422918387764e-06,
"loss": 0.5339,
"step": 1169
},
{
"epoch": 0.25,
"grad_norm": 0.1567242443561554,
"learning_rate": 8.763126435032717e-06,
"loss": 0.5516,
"step": 1170
},
{
"epoch": 0.25,
"grad_norm": 0.16098615527153015,
"learning_rate": 8.760828119204787e-06,
"loss": 0.5642,
"step": 1171
},
{
"epoch": 0.25,
"grad_norm": 0.16631126403808594,
"learning_rate": 8.758527972023151e-06,
"loss": 0.4856,
"step": 1172
},
{
"epoch": 0.25,
"grad_norm": 0.15367335081100464,
"learning_rate": 8.756225994607877e-06,
"loss": 0.5066,
"step": 1173
},
{
"epoch": 0.25,
"grad_norm": 0.14037656784057617,
"learning_rate": 8.753922188079923e-06,
"loss": 0.5029,
"step": 1174
},
{
"epoch": 0.25,
"grad_norm": 0.15949761867523193,
"learning_rate": 8.75161655356114e-06,
"loss": 0.4636,
"step": 1175
},
{
"epoch": 0.25,
"grad_norm": 0.1654081493616104,
"learning_rate": 8.749309092174267e-06,
"loss": 0.5005,
"step": 1176
},
{
"epoch": 0.25,
"grad_norm": 0.2345263659954071,
"learning_rate": 8.746999805042932e-06,
"loss": 0.5147,
"step": 1177
},
{
"epoch": 0.25,
"grad_norm": 0.13465999066829681,
"learning_rate": 8.744688693291658e-06,
"loss": 0.4982,
"step": 1178
},
{
"epoch": 0.25,
"grad_norm": 0.1473112851381302,
"learning_rate": 8.74237575804585e-06,
"loss": 0.4857,
"step": 1179
},
{
"epoch": 0.25,
"grad_norm": 0.18562085926532745,
"learning_rate": 8.740061000431805e-06,
"loss": 0.505,
"step": 1180
},
{
"epoch": 0.25,
"grad_norm": 0.15015870332717896,
"learning_rate": 8.737744421576702e-06,
"loss": 0.5246,
"step": 1181
},
{
"epoch": 0.25,
"grad_norm": 0.16794438660144806,
"learning_rate": 8.735426022608611e-06,
"loss": 0.5393,
"step": 1182
},
{
"epoch": 0.25,
"grad_norm": 0.15591543912887573,
"learning_rate": 8.73310580465649e-06,
"loss": 0.4964,
"step": 1183
},
{
"epoch": 0.26,
"grad_norm": 0.2005312144756317,
"learning_rate": 8.73078376885018e-06,
"loss": 0.5,
"step": 1184
},
{
"epoch": 0.26,
"grad_norm": 0.15269523859024048,
"learning_rate": 8.728459916320406e-06,
"loss": 0.509,
"step": 1185
},
{
"epoch": 0.26,
"grad_norm": 0.14824025332927704,
"learning_rate": 8.726134248198782e-06,
"loss": 0.5186,
"step": 1186
},
{
"epoch": 0.26,
"grad_norm": 0.15085245668888092,
"learning_rate": 8.723806765617801e-06,
"loss": 0.4852,
"step": 1187
},
{
"epoch": 0.26,
"grad_norm": 0.1564967930316925,
"learning_rate": 8.721477469710845e-06,
"loss": 0.5095,
"step": 1188
},
{
"epoch": 0.26,
"grad_norm": 0.1731698215007782,
"learning_rate": 8.719146361612172e-06,
"loss": 0.5231,
"step": 1189
},
{
"epoch": 0.26,
"grad_norm": 0.18087385594844818,
"learning_rate": 8.71681344245693e-06,
"loss": 0.5556,
"step": 1190
},
{
"epoch": 0.26,
"grad_norm": 0.1844499558210373,
"learning_rate": 8.714478713381144e-06,
"loss": 0.5893,
"step": 1191
},
{
"epoch": 0.26,
"grad_norm": 0.12835489213466644,
"learning_rate": 8.712142175521723e-06,
"loss": 0.4653,
"step": 1192
},
{
"epoch": 0.26,
"grad_norm": 0.1417992115020752,
"learning_rate": 8.709803830016454e-06,
"loss": 0.5421,
"step": 1193
},
{
"epoch": 0.26,
"grad_norm": 0.13503408432006836,
"learning_rate": 8.707463678004004e-06,
"loss": 0.5036,
"step": 1194
},
{
"epoch": 0.26,
"grad_norm": 0.1597423255443573,
"learning_rate": 8.705121720623927e-06,
"loss": 0.5046,
"step": 1195
},
{
"epoch": 0.26,
"grad_norm": 0.1646643579006195,
"learning_rate": 8.702777959016647e-06,
"loss": 0.5126,
"step": 1196
},
{
"epoch": 0.26,
"grad_norm": 0.18008291721343994,
"learning_rate": 8.700432394323471e-06,
"loss": 0.5419,
"step": 1197
},
{
"epoch": 0.26,
"grad_norm": 0.14976496994495392,
"learning_rate": 8.698085027686581e-06,
"loss": 0.5095,
"step": 1198
},
{
"epoch": 0.26,
"grad_norm": 0.16157154738903046,
"learning_rate": 8.695735860249041e-06,
"loss": 0.5152,
"step": 1199
},
{
"epoch": 0.26,
"grad_norm": 0.16819888353347778,
"learning_rate": 8.69338489315479e-06,
"loss": 0.5401,
"step": 1200
},
{
"epoch": 0.26,
"grad_norm": 0.16953587532043457,
"learning_rate": 8.691032127548643e-06,
"loss": 0.5177,
"step": 1201
},
{
"epoch": 0.26,
"grad_norm": 0.15358132123947144,
"learning_rate": 8.68867756457629e-06,
"loss": 0.547,
"step": 1202
},
{
"epoch": 0.26,
"grad_norm": 0.13902026414871216,
"learning_rate": 8.686321205384296e-06,
"loss": 0.5487,
"step": 1203
},
{
"epoch": 0.26,
"grad_norm": 0.1606639176607132,
"learning_rate": 8.683963051120103e-06,
"loss": 0.4611,
"step": 1204
},
{
"epoch": 0.26,
"grad_norm": 0.14703510701656342,
"learning_rate": 8.681603102932026e-06,
"loss": 0.4999,
"step": 1205
},
{
"epoch": 0.26,
"grad_norm": 0.19730383157730103,
"learning_rate": 8.679241361969252e-06,
"loss": 0.4937,
"step": 1206
},
{
"epoch": 0.26,
"grad_norm": 0.1710227131843567,
"learning_rate": 8.676877829381843e-06,
"loss": 0.5255,
"step": 1207
},
{
"epoch": 0.26,
"grad_norm": 0.18335406482219696,
"learning_rate": 8.674512506320733e-06,
"loss": 0.603,
"step": 1208
},
{
"epoch": 0.26,
"grad_norm": 0.14981816709041595,
"learning_rate": 8.67214539393773e-06,
"loss": 0.4541,
"step": 1209
},
{
"epoch": 0.26,
"grad_norm": 0.2136390507221222,
"learning_rate": 8.669776493385506e-06,
"loss": 0.5327,
"step": 1210
},
{
"epoch": 0.26,
"grad_norm": 0.1298462301492691,
"learning_rate": 8.667405805817613e-06,
"loss": 0.5373,
"step": 1211
},
{
"epoch": 0.26,
"grad_norm": 0.1850888431072235,
"learning_rate": 8.665033332388466e-06,
"loss": 0.5459,
"step": 1212
},
{
"epoch": 0.26,
"grad_norm": 0.19591952860355377,
"learning_rate": 8.662659074253355e-06,
"loss": 0.5137,
"step": 1213
},
{
"epoch": 0.26,
"grad_norm": 0.13489966094493866,
"learning_rate": 8.660283032568435e-06,
"loss": 0.5468,
"step": 1214
},
{
"epoch": 0.26,
"grad_norm": 0.15992878377437592,
"learning_rate": 8.657905208490732e-06,
"loss": 0.5045,
"step": 1215
},
{
"epoch": 0.26,
"grad_norm": 0.16097012162208557,
"learning_rate": 8.655525603178137e-06,
"loss": 0.5239,
"step": 1216
},
{
"epoch": 0.26,
"grad_norm": 0.17989301681518555,
"learning_rate": 8.653144217789414e-06,
"loss": 0.5239,
"step": 1217
},
{
"epoch": 0.26,
"grad_norm": 0.1628495454788208,
"learning_rate": 8.650761053484188e-06,
"loss": 0.5315,
"step": 1218
},
{
"epoch": 0.26,
"grad_norm": 0.13146936893463135,
"learning_rate": 8.648376111422954e-06,
"loss": 0.5351,
"step": 1219
},
{
"epoch": 0.26,
"grad_norm": 0.212355837225914,
"learning_rate": 8.645989392767068e-06,
"loss": 0.5092,
"step": 1220
},
{
"epoch": 0.26,
"grad_norm": 0.11173799633979797,
"learning_rate": 8.643600898678758e-06,
"loss": 0.5176,
"step": 1221
},
{
"epoch": 0.26,
"grad_norm": 0.1470513939857483,
"learning_rate": 8.641210630321115e-06,
"loss": 0.5159,
"step": 1222
},
{
"epoch": 0.26,
"grad_norm": 0.1474858522415161,
"learning_rate": 8.638818588858084e-06,
"loss": 0.5103,
"step": 1223
},
{
"epoch": 0.26,
"grad_norm": 0.13153494894504547,
"learning_rate": 8.636424775454489e-06,
"loss": 0.5596,
"step": 1224
},
{
"epoch": 0.26,
"grad_norm": 0.1469038426876068,
"learning_rate": 8.634029191276003e-06,
"loss": 0.5363,
"step": 1225
},
{
"epoch": 0.26,
"grad_norm": 0.15724244713783264,
"learning_rate": 8.631631837489173e-06,
"loss": 0.5318,
"step": 1226
},
{
"epoch": 0.26,
"grad_norm": 0.16701483726501465,
"learning_rate": 8.6292327152614e-06,
"loss": 0.5219,
"step": 1227
},
{
"epoch": 0.26,
"grad_norm": 0.1822412610054016,
"learning_rate": 8.626831825760946e-06,
"loss": 0.5067,
"step": 1228
},
{
"epoch": 0.26,
"grad_norm": 0.14838603138923645,
"learning_rate": 8.62442917015694e-06,
"loss": 0.5298,
"step": 1229
},
{
"epoch": 0.26,
"grad_norm": 0.13148529827594757,
"learning_rate": 8.622024749619363e-06,
"loss": 0.4947,
"step": 1230
},
{
"epoch": 0.27,
"grad_norm": 0.1674978882074356,
"learning_rate": 8.619618565319063e-06,
"loss": 0.5674,
"step": 1231
},
{
"epoch": 0.27,
"grad_norm": 0.2056237906217575,
"learning_rate": 8.61721061842774e-06,
"loss": 0.4931,
"step": 1232
},
{
"epoch": 0.27,
"grad_norm": 0.1400204300880432,
"learning_rate": 8.614800910117958e-06,
"loss": 0.543,
"step": 1233
},
{
"epoch": 0.27,
"grad_norm": 0.1407189816236496,
"learning_rate": 8.612389441563136e-06,
"loss": 0.5108,
"step": 1234
},
{
"epoch": 0.27,
"grad_norm": 0.1611378788948059,
"learning_rate": 8.60997621393755e-06,
"loss": 0.4961,
"step": 1235
},
{
"epoch": 0.27,
"grad_norm": 0.1521531641483307,
"learning_rate": 8.60756122841633e-06,
"loss": 0.4755,
"step": 1236
},
{
"epoch": 0.27,
"grad_norm": 0.14714032411575317,
"learning_rate": 8.60514448617547e-06,
"loss": 0.5365,
"step": 1237
},
{
"epoch": 0.27,
"grad_norm": 0.17980900406837463,
"learning_rate": 8.602725988391814e-06,
"loss": 0.5424,
"step": 1238
},
{
"epoch": 0.27,
"grad_norm": 0.16438312828540802,
"learning_rate": 8.600305736243057e-06,
"loss": 0.5523,
"step": 1239
},
{
"epoch": 0.27,
"grad_norm": 0.1427246630191803,
"learning_rate": 8.597883730907757e-06,
"loss": 0.5091,
"step": 1240
},
{
"epoch": 0.27,
"grad_norm": 0.1325269341468811,
"learning_rate": 8.59545997356532e-06,
"loss": 0.481,
"step": 1241
},
{
"epoch": 0.27,
"grad_norm": 0.17241443693637848,
"learning_rate": 8.593034465396007e-06,
"loss": 0.5071,
"step": 1242
},
{
"epoch": 0.27,
"grad_norm": 0.14038234949111938,
"learning_rate": 8.590607207580927e-06,
"loss": 0.5394,
"step": 1243
},
{
"epoch": 0.27,
"grad_norm": 0.20857305824756622,
"learning_rate": 8.588178201302052e-06,
"loss": 0.4944,
"step": 1244
},
{
"epoch": 0.27,
"grad_norm": 0.1448458433151245,
"learning_rate": 8.585747447742194e-06,
"loss": 0.52,
"step": 1245
},
{
"epoch": 0.27,
"grad_norm": 0.17979028820991516,
"learning_rate": 8.583314948085023e-06,
"loss": 0.5241,
"step": 1246
},
{
"epoch": 0.27,
"grad_norm": 0.16653123497962952,
"learning_rate": 8.580880703515052e-06,
"loss": 0.5061,
"step": 1247
},
{
"epoch": 0.27,
"grad_norm": 0.2052346169948578,
"learning_rate": 8.578444715217652e-06,
"loss": 0.471,
"step": 1248
},
{
"epoch": 0.27,
"grad_norm": 0.1382577270269394,
"learning_rate": 8.576006984379042e-06,
"loss": 0.4621,
"step": 1249
},
{
"epoch": 0.27,
"grad_norm": 0.17501065135002136,
"learning_rate": 8.57356751218628e-06,
"loss": 0.5761,
"step": 1250
},
{
"epoch": 0.27,
"grad_norm": 0.14629067480564117,
"learning_rate": 8.571126299827284e-06,
"loss": 0.511,
"step": 1251
},
{
"epoch": 0.27,
"grad_norm": 0.16205544769763947,
"learning_rate": 8.568683348490817e-06,
"loss": 0.5259,
"step": 1252
},
{
"epoch": 0.27,
"grad_norm": 0.14176106452941895,
"learning_rate": 8.566238659366477e-06,
"loss": 0.5333,
"step": 1253
},
{
"epoch": 0.27,
"grad_norm": 0.27345001697540283,
"learning_rate": 8.563792233644725e-06,
"loss": 0.5117,
"step": 1254
},
{
"epoch": 0.27,
"grad_norm": 0.16053150594234467,
"learning_rate": 8.561344072516858e-06,
"loss": 0.5015,
"step": 1255
},
{
"epoch": 0.27,
"grad_norm": 0.19150519371032715,
"learning_rate": 8.558894177175019e-06,
"loss": 0.5326,
"step": 1256
},
{
"epoch": 0.27,
"grad_norm": 0.14895778894424438,
"learning_rate": 8.556442548812198e-06,
"loss": 0.5247,
"step": 1257
},
{
"epoch": 0.27,
"grad_norm": 0.16230621933937073,
"learning_rate": 8.553989188622228e-06,
"loss": 0.5634,
"step": 1258
},
{
"epoch": 0.27,
"grad_norm": 0.15796539187431335,
"learning_rate": 8.55153409779978e-06,
"loss": 0.5686,
"step": 1259
},
{
"epoch": 0.27,
"grad_norm": 0.15374596416950226,
"learning_rate": 8.549077277540379e-06,
"loss": 0.5287,
"step": 1260
},
{
"epoch": 0.27,
"grad_norm": 0.14890524744987488,
"learning_rate": 8.546618729040382e-06,
"loss": 0.5112,
"step": 1261
},
{
"epoch": 0.27,
"grad_norm": 0.1993798166513443,
"learning_rate": 8.544158453496992e-06,
"loss": 0.5229,
"step": 1262
},
{
"epoch": 0.27,
"grad_norm": 0.16211991012096405,
"learning_rate": 8.541696452108253e-06,
"loss": 0.5332,
"step": 1263
},
{
"epoch": 0.27,
"grad_norm": 0.2108837217092514,
"learning_rate": 8.539232726073046e-06,
"loss": 0.5223,
"step": 1264
},
{
"epoch": 0.27,
"grad_norm": 0.14320197701454163,
"learning_rate": 8.536767276591098e-06,
"loss": 0.4906,
"step": 1265
},
{
"epoch": 0.27,
"grad_norm": 0.14289528131484985,
"learning_rate": 8.53430010486297e-06,
"loss": 0.5253,
"step": 1266
},
{
"epoch": 0.27,
"grad_norm": 0.1269850730895996,
"learning_rate": 8.531831212090062e-06,
"loss": 0.5145,
"step": 1267
},
{
"epoch": 0.27,
"grad_norm": 0.18504297733306885,
"learning_rate": 8.529360599474616e-06,
"loss": 0.4976,
"step": 1268
},
{
"epoch": 0.27,
"grad_norm": 0.13720788061618805,
"learning_rate": 8.52688826821971e-06,
"loss": 0.4952,
"step": 1269
},
{
"epoch": 0.27,
"grad_norm": 0.2334408462047577,
"learning_rate": 8.524414219529253e-06,
"loss": 0.5416,
"step": 1270
},
{
"epoch": 0.27,
"grad_norm": 0.21838751435279846,
"learning_rate": 8.521938454608e-06,
"loss": 0.5012,
"step": 1271
},
{
"epoch": 0.27,
"grad_norm": 0.143874391913414,
"learning_rate": 8.519460974661533e-06,
"loss": 0.5323,
"step": 1272
},
{
"epoch": 0.27,
"grad_norm": 0.14506854116916656,
"learning_rate": 8.516981780896276e-06,
"loss": 0.5148,
"step": 1273
},
{
"epoch": 0.27,
"grad_norm": 0.1657627373933792,
"learning_rate": 8.514500874519483e-06,
"loss": 0.5507,
"step": 1274
},
{
"epoch": 0.27,
"grad_norm": 0.15067879855632782,
"learning_rate": 8.512018256739242e-06,
"loss": 0.4994,
"step": 1275
},
{
"epoch": 0.27,
"grad_norm": 0.1645599901676178,
"learning_rate": 8.509533928764482e-06,
"loss": 0.5025,
"step": 1276
},
{
"epoch": 0.28,
"grad_norm": 0.14725331962108612,
"learning_rate": 8.507047891804951e-06,
"loss": 0.5635,
"step": 1277
},
{
"epoch": 0.28,
"grad_norm": 0.16245393455028534,
"learning_rate": 8.50456014707124e-06,
"loss": 0.4446,
"step": 1278
},
{
"epoch": 0.28,
"grad_norm": 0.14229734241962433,
"learning_rate": 8.502070695774771e-06,
"loss": 0.5043,
"step": 1279
},
{
"epoch": 0.28,
"grad_norm": 0.20700879395008087,
"learning_rate": 8.499579539127794e-06,
"loss": 0.487,
"step": 1280
},
{
"epoch": 0.28,
"grad_norm": 0.1793096512556076,
"learning_rate": 8.497086678343385e-06,
"loss": 0.5082,
"step": 1281
},
{
"epoch": 0.28,
"grad_norm": 0.14241085946559906,
"learning_rate": 8.494592114635458e-06,
"loss": 0.5334,
"step": 1282
},
{
"epoch": 0.28,
"grad_norm": 0.1370537132024765,
"learning_rate": 8.492095849218756e-06,
"loss": 0.5242,
"step": 1283
},
{
"epoch": 0.28,
"grad_norm": 0.1460958868265152,
"learning_rate": 8.489597883308844e-06,
"loss": 0.5325,
"step": 1284
},
{
"epoch": 0.28,
"grad_norm": 0.18947859108448029,
"learning_rate": 8.487098218122119e-06,
"loss": 0.5344,
"step": 1285
},
{
"epoch": 0.28,
"grad_norm": 0.2026044875383377,
"learning_rate": 8.484596854875806e-06,
"loss": 0.5627,
"step": 1286
},
{
"epoch": 0.28,
"grad_norm": 0.13377788662910461,
"learning_rate": 8.482093794787956e-06,
"loss": 0.5525,
"step": 1287
},
{
"epoch": 0.28,
"grad_norm": 0.22986631095409393,
"learning_rate": 8.479589039077446e-06,
"loss": 0.5288,
"step": 1288
},
{
"epoch": 0.28,
"grad_norm": 0.17068606615066528,
"learning_rate": 8.47708258896398e-06,
"loss": 0.5352,
"step": 1289
},
{
"epoch": 0.28,
"grad_norm": 0.15582841634750366,
"learning_rate": 8.474574445668085e-06,
"loss": 0.5475,
"step": 1290
},
{
"epoch": 0.28,
"grad_norm": 0.19104814529418945,
"learning_rate": 8.472064610411115e-06,
"loss": 0.5225,
"step": 1291
},
{
"epoch": 0.28,
"grad_norm": 0.12952920794487,
"learning_rate": 8.469553084415247e-06,
"loss": 0.4927,
"step": 1292
},
{
"epoch": 0.28,
"grad_norm": 0.32774683833122253,
"learning_rate": 8.467039868903477e-06,
"loss": 0.5286,
"step": 1293
},
{
"epoch": 0.28,
"grad_norm": 0.16002535820007324,
"learning_rate": 8.464524965099632e-06,
"loss": 0.5124,
"step": 1294
},
{
"epoch": 0.28,
"grad_norm": 0.15826278924942017,
"learning_rate": 8.462008374228356e-06,
"loss": 0.5502,
"step": 1295
},
{
"epoch": 0.28,
"grad_norm": 0.1503647416830063,
"learning_rate": 8.459490097515114e-06,
"loss": 0.5833,
"step": 1296
},
{
"epoch": 0.28,
"grad_norm": 0.18131448328495026,
"learning_rate": 8.456970136186193e-06,
"loss": 0.4606,
"step": 1297
},
{
"epoch": 0.28,
"grad_norm": 0.16622257232666016,
"learning_rate": 8.454448491468702e-06,
"loss": 0.5207,
"step": 1298
},
{
"epoch": 0.28,
"grad_norm": 0.16979950666427612,
"learning_rate": 8.451925164590568e-06,
"loss": 0.5655,
"step": 1299
},
{
"epoch": 0.28,
"grad_norm": 0.19531172513961792,
"learning_rate": 8.449400156780536e-06,
"loss": 0.4779,
"step": 1300
},
{
"epoch": 0.28,
"grad_norm": 0.17314670979976654,
"learning_rate": 8.44687346926817e-06,
"loss": 0.5046,
"step": 1301
},
{
"epoch": 0.28,
"grad_norm": 0.1429021954536438,
"learning_rate": 8.444345103283858e-06,
"loss": 0.527,
"step": 1302
},
{
"epoch": 0.28,
"grad_norm": 0.19530290365219116,
"learning_rate": 8.441815060058795e-06,
"loss": 0.518,
"step": 1303
},
{
"epoch": 0.28,
"grad_norm": 0.1742294281721115,
"learning_rate": 8.439283340825002e-06,
"loss": 0.5443,
"step": 1304
},
{
"epoch": 0.28,
"grad_norm": 0.18429934978485107,
"learning_rate": 8.436749946815308e-06,
"loss": 0.5474,
"step": 1305
},
{
"epoch": 0.28,
"grad_norm": 0.1543246954679489,
"learning_rate": 8.434214879263365e-06,
"loss": 0.5142,
"step": 1306
},
{
"epoch": 0.28,
"grad_norm": 0.16444545984268188,
"learning_rate": 8.431678139403635e-06,
"loss": 0.5534,
"step": 1307
},
{
"epoch": 0.28,
"grad_norm": 0.19701968133449554,
"learning_rate": 8.429139728471395e-06,
"loss": 0.5156,
"step": 1308
},
{
"epoch": 0.28,
"grad_norm": 0.14688943326473236,
"learning_rate": 8.426599647702738e-06,
"loss": 0.5208,
"step": 1309
},
{
"epoch": 0.28,
"grad_norm": 0.19136419892311096,
"learning_rate": 8.424057898334569e-06,
"loss": 0.6148,
"step": 1310
},
{
"epoch": 0.28,
"grad_norm": 0.17055533826351166,
"learning_rate": 8.421514481604605e-06,
"loss": 0.5107,
"step": 1311
},
{
"epoch": 0.28,
"grad_norm": 0.16385668516159058,
"learning_rate": 8.418969398751375e-06,
"loss": 0.502,
"step": 1312
},
{
"epoch": 0.28,
"grad_norm": 0.17869453132152557,
"learning_rate": 8.41642265101422e-06,
"loss": 0.5464,
"step": 1313
},
{
"epoch": 0.28,
"grad_norm": 0.14309388399124146,
"learning_rate": 8.413874239633291e-06,
"loss": 0.5585,
"step": 1314
},
{
"epoch": 0.28,
"grad_norm": 0.16163702309131622,
"learning_rate": 8.41132416584955e-06,
"loss": 0.553,
"step": 1315
},
{
"epoch": 0.28,
"grad_norm": 0.15878815948963165,
"learning_rate": 8.408772430904768e-06,
"loss": 0.5359,
"step": 1316
},
{
"epoch": 0.28,
"grad_norm": 0.14803734421730042,
"learning_rate": 8.406219036041523e-06,
"loss": 0.5177,
"step": 1317
},
{
"epoch": 0.28,
"grad_norm": 0.16167186200618744,
"learning_rate": 8.403663982503205e-06,
"loss": 0.5106,
"step": 1318
},
{
"epoch": 0.28,
"grad_norm": 0.14223089814186096,
"learning_rate": 8.40110727153401e-06,
"loss": 0.4768,
"step": 1319
},
{
"epoch": 0.28,
"grad_norm": 0.1392257660627365,
"learning_rate": 8.398548904378938e-06,
"loss": 0.4928,
"step": 1320
},
{
"epoch": 0.28,
"grad_norm": 0.1703733652830124,
"learning_rate": 8.395988882283803e-06,
"loss": 0.462,
"step": 1321
},
{
"epoch": 0.28,
"grad_norm": 0.14999133348464966,
"learning_rate": 8.393427206495217e-06,
"loss": 0.5035,
"step": 1322
},
{
"epoch": 0.28,
"grad_norm": 0.18849503993988037,
"learning_rate": 8.390863878260602e-06,
"loss": 0.5025,
"step": 1323
},
{
"epoch": 0.29,
"grad_norm": 0.2667754888534546,
"learning_rate": 8.388298898828182e-06,
"loss": 0.517,
"step": 1324
},
{
"epoch": 0.29,
"grad_norm": 0.1366441398859024,
"learning_rate": 8.385732269446987e-06,
"loss": 0.4938,
"step": 1325
},
{
"epoch": 0.29,
"grad_norm": 0.16878017783164978,
"learning_rate": 8.383163991366852e-06,
"loss": 0.5057,
"step": 1326
},
{
"epoch": 0.29,
"grad_norm": 0.14408189058303833,
"learning_rate": 8.38059406583841e-06,
"loss": 0.5197,
"step": 1327
},
{
"epoch": 0.29,
"grad_norm": 0.14448203146457672,
"learning_rate": 8.378022494113099e-06,
"loss": 0.5289,
"step": 1328
},
{
"epoch": 0.29,
"grad_norm": 0.1776053011417389,
"learning_rate": 8.37544927744316e-06,
"loss": 0.529,
"step": 1329
},
{
"epoch": 0.29,
"grad_norm": 0.1904003769159317,
"learning_rate": 8.372874417081632e-06,
"loss": 0.5253,
"step": 1330
},
{
"epoch": 0.29,
"grad_norm": 0.15336477756500244,
"learning_rate": 8.370297914282354e-06,
"loss": 0.5307,
"step": 1331
},
{
"epoch": 0.29,
"grad_norm": 0.1891254037618637,
"learning_rate": 8.367719770299972e-06,
"loss": 0.5089,
"step": 1332
},
{
"epoch": 0.29,
"grad_norm": 0.22274090349674225,
"learning_rate": 8.36513998638992e-06,
"loss": 0.5328,
"step": 1333
},
{
"epoch": 0.29,
"grad_norm": 0.1466333568096161,
"learning_rate": 8.36255856380844e-06,
"loss": 0.5408,
"step": 1334
},
{
"epoch": 0.29,
"grad_norm": 0.15075673162937164,
"learning_rate": 8.359975503812569e-06,
"loss": 0.5402,
"step": 1335
},
{
"epoch": 0.29,
"grad_norm": 0.1457648128271103,
"learning_rate": 8.35739080766014e-06,
"loss": 0.5256,
"step": 1336
},
{
"epoch": 0.29,
"grad_norm": 0.1647207885980606,
"learning_rate": 8.35480447660978e-06,
"loss": 0.5204,
"step": 1337
},
{
"epoch": 0.29,
"grad_norm": 0.16034474968910217,
"learning_rate": 8.352216511920921e-06,
"loss": 0.5282,
"step": 1338
},
{
"epoch": 0.29,
"grad_norm": 0.1303335428237915,
"learning_rate": 8.349626914853781e-06,
"loss": 0.4993,
"step": 1339
},
{
"epoch": 0.29,
"grad_norm": 0.17350099980831146,
"learning_rate": 8.34703568666938e-06,
"loss": 0.6363,
"step": 1340
},
{
"epoch": 0.29,
"grad_norm": 0.16359736025333405,
"learning_rate": 8.344442828629526e-06,
"loss": 0.5418,
"step": 1341
},
{
"epoch": 0.29,
"grad_norm": 0.1771382838487625,
"learning_rate": 8.341848341996828e-06,
"loss": 0.5243,
"step": 1342
},
{
"epoch": 0.29,
"grad_norm": 0.14461980760097504,
"learning_rate": 8.33925222803468e-06,
"loss": 0.5308,
"step": 1343
},
{
"epoch": 0.29,
"grad_norm": 0.19642101228237152,
"learning_rate": 8.336654488007277e-06,
"loss": 0.5189,
"step": 1344
},
{
"epoch": 0.29,
"grad_norm": 0.18800689280033112,
"learning_rate": 8.334055123179596e-06,
"loss": 0.5177,
"step": 1345
},
{
"epoch": 0.29,
"grad_norm": 0.20820565521717072,
"learning_rate": 8.331454134817414e-06,
"loss": 0.5033,
"step": 1346
},
{
"epoch": 0.29,
"grad_norm": 0.15935355424880981,
"learning_rate": 8.328851524187292e-06,
"loss": 0.4901,
"step": 1347
},
{
"epoch": 0.29,
"grad_norm": 0.15410637855529785,
"learning_rate": 8.326247292556588e-06,
"loss": 0.5402,
"step": 1348
},
{
"epoch": 0.29,
"grad_norm": 0.21510785818099976,
"learning_rate": 8.323641441193441e-06,
"loss": 0.5414,
"step": 1349
},
{
"epoch": 0.29,
"grad_norm": 0.20484770834445953,
"learning_rate": 8.321033971366788e-06,
"loss": 0.4995,
"step": 1350
},
{
"epoch": 0.29,
"grad_norm": 0.15138699114322662,
"learning_rate": 8.318424884346347e-06,
"loss": 0.5191,
"step": 1351
},
{
"epoch": 0.29,
"grad_norm": 0.1576775163412094,
"learning_rate": 8.315814181402623e-06,
"loss": 0.5358,
"step": 1352
},
{
"epoch": 0.29,
"grad_norm": 0.15024110674858093,
"learning_rate": 8.313201863806915e-06,
"loss": 0.4613,
"step": 1353
},
{
"epoch": 0.29,
"grad_norm": 0.15514235198497772,
"learning_rate": 8.310587932831302e-06,
"loss": 0.4951,
"step": 1354
},
{
"epoch": 0.29,
"grad_norm": 0.20852284133434296,
"learning_rate": 8.30797238974865e-06,
"loss": 0.5085,
"step": 1355
},
{
"epoch": 0.29,
"grad_norm": 0.15601487457752228,
"learning_rate": 8.305355235832611e-06,
"loss": 0.5467,
"step": 1356
},
{
"epoch": 0.29,
"grad_norm": 0.22823049128055573,
"learning_rate": 8.30273647235762e-06,
"loss": 0.5444,
"step": 1357
},
{
"epoch": 0.29,
"grad_norm": 0.17297740280628204,
"learning_rate": 8.300116100598899e-06,
"loss": 0.4745,
"step": 1358
},
{
"epoch": 0.29,
"grad_norm": 0.16721418499946594,
"learning_rate": 8.297494121832449e-06,
"loss": 0.5331,
"step": 1359
},
{
"epoch": 0.29,
"grad_norm": 0.20764422416687012,
"learning_rate": 8.294870537335054e-06,
"loss": 0.5123,
"step": 1360
},
{
"epoch": 0.29,
"grad_norm": 0.12124624103307724,
"learning_rate": 8.292245348384285e-06,
"loss": 0.4942,
"step": 1361
},
{
"epoch": 0.29,
"grad_norm": 0.18373292684555054,
"learning_rate": 8.28961855625849e-06,
"loss": 0.6003,
"step": 1362
},
{
"epoch": 0.29,
"grad_norm": 0.15665894746780396,
"learning_rate": 8.286990162236796e-06,
"loss": 0.5199,
"step": 1363
},
{
"epoch": 0.29,
"grad_norm": 0.18932463228702545,
"learning_rate": 8.284360167599113e-06,
"loss": 0.5577,
"step": 1364
},
{
"epoch": 0.29,
"grad_norm": 0.14339394867420197,
"learning_rate": 8.28172857362613e-06,
"loss": 0.5319,
"step": 1365
},
{
"epoch": 0.29,
"grad_norm": 0.16630741953849792,
"learning_rate": 8.279095381599318e-06,
"loss": 0.506,
"step": 1366
},
{
"epoch": 0.29,
"grad_norm": 0.15607817471027374,
"learning_rate": 8.27646059280092e-06,
"loss": 0.5348,
"step": 1367
},
{
"epoch": 0.29,
"grad_norm": 0.1827673465013504,
"learning_rate": 8.273824208513956e-06,
"loss": 0.5234,
"step": 1368
},
{
"epoch": 0.29,
"grad_norm": 0.18514670431613922,
"learning_rate": 8.27118623002223e-06,
"loss": 0.4667,
"step": 1369
},
{
"epoch": 0.3,
"grad_norm": 0.14588609337806702,
"learning_rate": 8.268546658610319e-06,
"loss": 0.4641,
"step": 1370
},
{
"epoch": 0.3,
"grad_norm": 0.14752966165542603,
"learning_rate": 8.265905495563573e-06,
"loss": 0.4737,
"step": 1371
},
{
"epoch": 0.3,
"grad_norm": 0.18035411834716797,
"learning_rate": 8.26326274216812e-06,
"loss": 0.5087,
"step": 1372
},
{
"epoch": 0.3,
"grad_norm": 0.14755289256572723,
"learning_rate": 8.260618399710864e-06,
"loss": 0.5454,
"step": 1373
},
{
"epoch": 0.3,
"grad_norm": 0.18107686936855316,
"learning_rate": 8.257972469479478e-06,
"loss": 0.469,
"step": 1374
},
{
"epoch": 0.3,
"grad_norm": 0.13992854952812195,
"learning_rate": 8.255324952762413e-06,
"loss": 0.4561,
"step": 1375
},
{
"epoch": 0.3,
"grad_norm": 0.18599078059196472,
"learning_rate": 8.252675850848886e-06,
"loss": 0.4449,
"step": 1376
},
{
"epoch": 0.3,
"grad_norm": 0.14460837841033936,
"learning_rate": 8.250025165028897e-06,
"loss": 0.5144,
"step": 1377
},
{
"epoch": 0.3,
"grad_norm": 0.15791229903697968,
"learning_rate": 8.247372896593203e-06,
"loss": 0.5268,
"step": 1378
},
{
"epoch": 0.3,
"grad_norm": 0.15533843636512756,
"learning_rate": 8.244719046833342e-06,
"loss": 0.5176,
"step": 1379
},
{
"epoch": 0.3,
"grad_norm": 0.16106192767620087,
"learning_rate": 8.24206361704162e-06,
"loss": 0.5609,
"step": 1380
},
{
"epoch": 0.3,
"grad_norm": 0.1757259964942932,
"learning_rate": 8.239406608511113e-06,
"loss": 0.5459,
"step": 1381
},
{
"epoch": 0.3,
"grad_norm": 0.14974632859230042,
"learning_rate": 8.236748022535662e-06,
"loss": 0.5193,
"step": 1382
},
{
"epoch": 0.3,
"grad_norm": 0.16588665544986725,
"learning_rate": 8.23408786040988e-06,
"loss": 0.5399,
"step": 1383
},
{
"epoch": 0.3,
"grad_norm": 0.18392562866210938,
"learning_rate": 8.231426123429143e-06,
"loss": 0.5266,
"step": 1384
},
{
"epoch": 0.3,
"grad_norm": 0.15321050584316254,
"learning_rate": 8.2287628128896e-06,
"loss": 0.5206,
"step": 1385
},
{
"epoch": 0.3,
"grad_norm": 0.25465235114097595,
"learning_rate": 8.226097930088162e-06,
"loss": 0.5679,
"step": 1386
},
{
"epoch": 0.3,
"grad_norm": 0.16098381578922272,
"learning_rate": 8.223431476322508e-06,
"loss": 0.501,
"step": 1387
},
{
"epoch": 0.3,
"grad_norm": 0.18890248239040375,
"learning_rate": 8.220763452891078e-06,
"loss": 0.5524,
"step": 1388
},
{
"epoch": 0.3,
"grad_norm": 0.19365254044532776,
"learning_rate": 8.218093861093082e-06,
"loss": 0.4858,
"step": 1389
},
{
"epoch": 0.3,
"grad_norm": 0.13747772574424744,
"learning_rate": 8.215422702228487e-06,
"loss": 0.5109,
"step": 1390
},
{
"epoch": 0.3,
"grad_norm": 0.1644936501979828,
"learning_rate": 8.212749977598032e-06,
"loss": 0.4996,
"step": 1391
},
{
"epoch": 0.3,
"grad_norm": 0.17819000780582428,
"learning_rate": 8.210075688503209e-06,
"loss": 0.5312,
"step": 1392
},
{
"epoch": 0.3,
"grad_norm": 0.15765920281410217,
"learning_rate": 8.207399836246278e-06,
"loss": 0.5171,
"step": 1393
},
{
"epoch": 0.3,
"grad_norm": 0.20357385277748108,
"learning_rate": 8.20472242213026e-06,
"loss": 0.5364,
"step": 1394
},
{
"epoch": 0.3,
"grad_norm": 0.15080830454826355,
"learning_rate": 8.202043447458934e-06,
"loss": 0.5169,
"step": 1395
},
{
"epoch": 0.3,
"grad_norm": 0.15993140637874603,
"learning_rate": 8.199362913536837e-06,
"loss": 0.6155,
"step": 1396
},
{
"epoch": 0.3,
"grad_norm": 0.18161435425281525,
"learning_rate": 8.19668082166927e-06,
"loss": 0.5493,
"step": 1397
},
{
"epoch": 0.3,
"grad_norm": 0.1412186175584793,
"learning_rate": 8.193997173162293e-06,
"loss": 0.5242,
"step": 1398
},
{
"epoch": 0.3,
"grad_norm": 0.15259157121181488,
"learning_rate": 8.19131196932272e-06,
"loss": 0.5644,
"step": 1399
},
{
"epoch": 0.3,
"grad_norm": 0.2190113365650177,
"learning_rate": 8.188625211458123e-06,
"loss": 0.541,
"step": 1400
},
{
"epoch": 0.3,
"grad_norm": 0.17318737506866455,
"learning_rate": 8.185936900876834e-06,
"loss": 0.5085,
"step": 1401
},
{
"epoch": 0.3,
"grad_norm": 0.16196967661380768,
"learning_rate": 8.183247038887937e-06,
"loss": 0.485,
"step": 1402
},
{
"epoch": 0.3,
"grad_norm": 0.19770100712776184,
"learning_rate": 8.180555626801274e-06,
"loss": 0.5142,
"step": 1403
},
{
"epoch": 0.3,
"grad_norm": 0.1743081659078598,
"learning_rate": 8.177862665927445e-06,
"loss": 0.565,
"step": 1404
},
{
"epoch": 0.3,
"grad_norm": 0.18734456598758698,
"learning_rate": 8.175168157577795e-06,
"loss": 0.5631,
"step": 1405
},
{
"epoch": 0.3,
"grad_norm": 0.15591241419315338,
"learning_rate": 8.17247210306443e-06,
"loss": 0.4886,
"step": 1406
},
{
"epoch": 0.3,
"grad_norm": 0.20416924357414246,
"learning_rate": 8.169774503700209e-06,
"loss": 0.5232,
"step": 1407
},
{
"epoch": 0.3,
"grad_norm": 0.1668728142976761,
"learning_rate": 8.167075360798739e-06,
"loss": 0.5058,
"step": 1408
},
{
"epoch": 0.3,
"grad_norm": 0.1554676592350006,
"learning_rate": 8.164374675674382e-06,
"loss": 0.5154,
"step": 1409
},
{
"epoch": 0.3,
"grad_norm": 0.2015198916196823,
"learning_rate": 8.161672449642248e-06,
"loss": 0.482,
"step": 1410
},
{
"epoch": 0.3,
"grad_norm": 0.13508014380931854,
"learning_rate": 8.158968684018202e-06,
"loss": 0.5501,
"step": 1411
},
{
"epoch": 0.3,
"grad_norm": 0.18742331862449646,
"learning_rate": 8.156263380118855e-06,
"loss": 0.5439,
"step": 1412
},
{
"epoch": 0.3,
"grad_norm": 0.13899442553520203,
"learning_rate": 8.153556539261566e-06,
"loss": 0.4965,
"step": 1413
},
{
"epoch": 0.3,
"grad_norm": 0.15461724996566772,
"learning_rate": 8.150848162764448e-06,
"loss": 0.5158,
"step": 1414
},
{
"epoch": 0.3,
"grad_norm": 0.1699683964252472,
"learning_rate": 8.148138251946355e-06,
"loss": 0.5345,
"step": 1415
},
{
"epoch": 0.31,
"grad_norm": 0.1647995263338089,
"learning_rate": 8.145426808126894e-06,
"loss": 0.5417,
"step": 1416
},
{
"epoch": 0.31,
"grad_norm": 0.15304109454154968,
"learning_rate": 8.142713832626412e-06,
"loss": 0.5546,
"step": 1417
},
{
"epoch": 0.31,
"grad_norm": 0.12711341679096222,
"learning_rate": 8.139999326766011e-06,
"loss": 0.5176,
"step": 1418
},
{
"epoch": 0.31,
"grad_norm": 0.15692314505577087,
"learning_rate": 8.137283291867527e-06,
"loss": 0.4648,
"step": 1419
},
{
"epoch": 0.31,
"grad_norm": 0.16730400919914246,
"learning_rate": 8.134565729253554e-06,
"loss": 0.5099,
"step": 1420
},
{
"epoch": 0.31,
"grad_norm": 0.15150144696235657,
"learning_rate": 8.131846640247415e-06,
"loss": 0.5261,
"step": 1421
},
{
"epoch": 0.31,
"grad_norm": 0.25064417719841003,
"learning_rate": 8.129126026173189e-06,
"loss": 0.5097,
"step": 1422
},
{
"epoch": 0.31,
"grad_norm": 0.1557064801454544,
"learning_rate": 8.126403888355689e-06,
"loss": 0.4951,
"step": 1423
},
{
"epoch": 0.31,
"grad_norm": 0.17393703758716583,
"learning_rate": 8.123680228120474e-06,
"loss": 0.5257,
"step": 1424
},
{
"epoch": 0.31,
"grad_norm": 0.1844862401485443,
"learning_rate": 8.120955046793847e-06,
"loss": 0.5361,
"step": 1425
},
{
"epoch": 0.31,
"grad_norm": 0.17331448197364807,
"learning_rate": 8.118228345702843e-06,
"loss": 0.5718,
"step": 1426
},
{
"epoch": 0.31,
"grad_norm": 0.19549396634101868,
"learning_rate": 8.115500126175246e-06,
"loss": 0.5322,
"step": 1427
},
{
"epoch": 0.31,
"grad_norm": 0.16723619401454926,
"learning_rate": 8.112770389539574e-06,
"loss": 0.5048,
"step": 1428
},
{
"epoch": 0.31,
"grad_norm": 0.15985050797462463,
"learning_rate": 8.11003913712509e-06,
"loss": 0.4759,
"step": 1429
},
{
"epoch": 0.31,
"grad_norm": 0.16711269319057465,
"learning_rate": 8.107306370261785e-06,
"loss": 0.5433,
"step": 1430
},
{
"epoch": 0.31,
"grad_norm": 0.15856465697288513,
"learning_rate": 8.104572090280397e-06,
"loss": 0.5132,
"step": 1431
},
{
"epoch": 0.31,
"grad_norm": 0.14167572557926178,
"learning_rate": 8.101836298512396e-06,
"loss": 0.4879,
"step": 1432
},
{
"epoch": 0.31,
"grad_norm": 0.17282311618328094,
"learning_rate": 8.099098996289986e-06,
"loss": 0.5943,
"step": 1433
},
{
"epoch": 0.31,
"grad_norm": 0.1634991616010666,
"learning_rate": 8.096360184946117e-06,
"loss": 0.5256,
"step": 1434
},
{
"epoch": 0.31,
"grad_norm": 0.17868229746818542,
"learning_rate": 8.093619865814461e-06,
"loss": 0.5314,
"step": 1435
},
{
"epoch": 0.31,
"grad_norm": 0.17916221916675568,
"learning_rate": 8.09087804022943e-06,
"loss": 0.5192,
"step": 1436
},
{
"epoch": 0.31,
"grad_norm": 0.15131542086601257,
"learning_rate": 8.088134709526174e-06,
"loss": 0.4965,
"step": 1437
},
{
"epoch": 0.31,
"grad_norm": 0.15476344525814056,
"learning_rate": 8.085389875040566e-06,
"loss": 0.547,
"step": 1438
},
{
"epoch": 0.31,
"grad_norm": 0.18421463668346405,
"learning_rate": 8.082643538109217e-06,
"loss": 0.5478,
"step": 1439
},
{
"epoch": 0.31,
"grad_norm": 0.1662701666355133,
"learning_rate": 8.079895700069473e-06,
"loss": 0.5092,
"step": 1440
},
{
"epoch": 0.31,
"grad_norm": 0.18112128973007202,
"learning_rate": 8.077146362259405e-06,
"loss": 0.5242,
"step": 1441
},
{
"epoch": 0.31,
"grad_norm": 0.13690048456192017,
"learning_rate": 8.074395526017816e-06,
"loss": 0.5172,
"step": 1442
},
{
"epoch": 0.31,
"grad_norm": 0.16095203161239624,
"learning_rate": 8.07164319268424e-06,
"loss": 0.5465,
"step": 1443
},
{
"epoch": 0.31,
"grad_norm": 0.13967949151992798,
"learning_rate": 8.06888936359894e-06,
"loss": 0.5786,
"step": 1444
},
{
"epoch": 0.31,
"grad_norm": 0.23251961171627045,
"learning_rate": 8.066134040102904e-06,
"loss": 0.5086,
"step": 1445
},
{
"epoch": 0.31,
"grad_norm": 0.20811443030834198,
"learning_rate": 8.063377223537853e-06,
"loss": 0.5101,
"step": 1446
},
{
"epoch": 0.31,
"grad_norm": 0.1625215709209442,
"learning_rate": 8.060618915246233e-06,
"loss": 0.5268,
"step": 1447
},
{
"epoch": 0.31,
"grad_norm": 0.1501462310552597,
"learning_rate": 8.057859116571213e-06,
"loss": 0.547,
"step": 1448
},
{
"epoch": 0.31,
"grad_norm": 0.16021014750003815,
"learning_rate": 8.055097828856691e-06,
"loss": 0.5311,
"step": 1449
},
{
"epoch": 0.31,
"grad_norm": 0.20781485736370087,
"learning_rate": 8.05233505344729e-06,
"loss": 0.5188,
"step": 1450
},
{
"epoch": 0.31,
"grad_norm": 0.3020351231098175,
"learning_rate": 8.049570791688356e-06,
"loss": 0.5023,
"step": 1451
},
{
"epoch": 0.31,
"grad_norm": 0.1566857397556305,
"learning_rate": 8.046805044925964e-06,
"loss": 0.48,
"step": 1452
},
{
"epoch": 0.31,
"grad_norm": 0.1672096997499466,
"learning_rate": 8.044037814506905e-06,
"loss": 0.5301,
"step": 1453
},
{
"epoch": 0.31,
"grad_norm": 0.19419468939304352,
"learning_rate": 8.041269101778694e-06,
"loss": 0.5226,
"step": 1454
},
{
"epoch": 0.31,
"grad_norm": 0.16195285320281982,
"learning_rate": 8.03849890808957e-06,
"loss": 0.5223,
"step": 1455
},
{
"epoch": 0.31,
"grad_norm": 0.14367403090000153,
"learning_rate": 8.035727234788496e-06,
"loss": 0.5274,
"step": 1456
},
{
"epoch": 0.31,
"grad_norm": 0.1967507302761078,
"learning_rate": 8.032954083225146e-06,
"loss": 0.4899,
"step": 1457
},
{
"epoch": 0.31,
"grad_norm": 0.23297229409217834,
"learning_rate": 8.030179454749925e-06,
"loss": 0.5186,
"step": 1458
},
{
"epoch": 0.31,
"grad_norm": 0.16745884716510773,
"learning_rate": 8.027403350713948e-06,
"loss": 0.492,
"step": 1459
},
{
"epoch": 0.31,
"grad_norm": 0.13999496400356293,
"learning_rate": 8.024625772469055e-06,
"loss": 0.5221,
"step": 1460
},
{
"epoch": 0.31,
"grad_norm": 0.140817791223526,
"learning_rate": 8.0218467213678e-06,
"loss": 0.5128,
"step": 1461
},
{
"epoch": 0.31,
"grad_norm": 0.15968118607997894,
"learning_rate": 8.019066198763458e-06,
"loss": 0.525,
"step": 1462
},
{
"epoch": 0.32,
"grad_norm": 0.13812531530857086,
"learning_rate": 8.016284206010015e-06,
"loss": 0.4477,
"step": 1463
},
{
"epoch": 0.32,
"grad_norm": 0.16426512598991394,
"learning_rate": 8.013500744462177e-06,
"loss": 0.4974,
"step": 1464
},
{
"epoch": 0.32,
"grad_norm": 0.15231406688690186,
"learning_rate": 8.010715815475365e-06,
"loss": 0.5289,
"step": 1465
},
{
"epoch": 0.32,
"grad_norm": 0.1844695508480072,
"learning_rate": 8.007929420405714e-06,
"loss": 0.5201,
"step": 1466
},
{
"epoch": 0.32,
"grad_norm": 0.17498986423015594,
"learning_rate": 8.005141560610072e-06,
"loss": 0.5619,
"step": 1467
},
{
"epoch": 0.32,
"grad_norm": 0.16564463078975677,
"learning_rate": 8.002352237446e-06,
"loss": 0.5398,
"step": 1468
},
{
"epoch": 0.32,
"grad_norm": 0.15143102407455444,
"learning_rate": 7.999561452271776e-06,
"loss": 0.5038,
"step": 1469
},
{
"epoch": 0.32,
"grad_norm": 0.17521046102046967,
"learning_rate": 7.996769206446383e-06,
"loss": 0.4634,
"step": 1470
},
{
"epoch": 0.32,
"grad_norm": 0.16226552426815033,
"learning_rate": 7.993975501329518e-06,
"loss": 0.5735,
"step": 1471
},
{
"epoch": 0.32,
"grad_norm": 0.2068720906972885,
"learning_rate": 7.991180338281594e-06,
"loss": 0.5329,
"step": 1472
},
{
"epoch": 0.32,
"grad_norm": 0.2290961742401123,
"learning_rate": 7.988383718663727e-06,
"loss": 0.5203,
"step": 1473
},
{
"epoch": 0.32,
"grad_norm": 0.14001663029193878,
"learning_rate": 7.985585643837743e-06,
"loss": 0.4844,
"step": 1474
},
{
"epoch": 0.32,
"grad_norm": 0.15565429627895355,
"learning_rate": 7.982786115166182e-06,
"loss": 0.5158,
"step": 1475
},
{
"epoch": 0.32,
"grad_norm": 0.12718220055103302,
"learning_rate": 7.979985134012285e-06,
"loss": 0.5256,
"step": 1476
},
{
"epoch": 0.32,
"grad_norm": 0.1732247918844223,
"learning_rate": 7.977182701740003e-06,
"loss": 0.5447,
"step": 1477
},
{
"epoch": 0.32,
"grad_norm": 0.16792930662631989,
"learning_rate": 7.974378819713998e-06,
"loss": 0.5415,
"step": 1478
},
{
"epoch": 0.32,
"grad_norm": 0.1823003590106964,
"learning_rate": 7.97157348929963e-06,
"loss": 0.5089,
"step": 1479
},
{
"epoch": 0.32,
"grad_norm": 0.1478123515844345,
"learning_rate": 7.968766711862971e-06,
"loss": 0.5763,
"step": 1480
},
{
"epoch": 0.32,
"grad_norm": 0.16354763507843018,
"learning_rate": 7.965958488770796e-06,
"loss": 0.5476,
"step": 1481
},
{
"epoch": 0.32,
"grad_norm": 0.13449835777282715,
"learning_rate": 7.963148821390578e-06,
"loss": 0.5205,
"step": 1482
},
{
"epoch": 0.32,
"grad_norm": 0.17802083492279053,
"learning_rate": 7.960337711090504e-06,
"loss": 0.5239,
"step": 1483
},
{
"epoch": 0.32,
"grad_norm": 0.20004011690616608,
"learning_rate": 7.957525159239454e-06,
"loss": 0.5291,
"step": 1484
},
{
"epoch": 0.32,
"grad_norm": 0.17748400568962097,
"learning_rate": 7.954711167207016e-06,
"loss": 0.4913,
"step": 1485
},
{
"epoch": 0.32,
"grad_norm": 0.22476144134998322,
"learning_rate": 7.951895736363477e-06,
"loss": 0.4939,
"step": 1486
},
{
"epoch": 0.32,
"grad_norm": 0.16127091646194458,
"learning_rate": 7.949078868079825e-06,
"loss": 0.5272,
"step": 1487
},
{
"epoch": 0.32,
"grad_norm": 0.18299731612205505,
"learning_rate": 7.946260563727746e-06,
"loss": 0.5951,
"step": 1488
},
{
"epoch": 0.32,
"grad_norm": 0.13896289467811584,
"learning_rate": 7.94344082467963e-06,
"loss": 0.5591,
"step": 1489
},
{
"epoch": 0.32,
"grad_norm": 0.1735697239637375,
"learning_rate": 7.940619652308562e-06,
"loss": 0.5432,
"step": 1490
},
{
"epoch": 0.32,
"grad_norm": 0.16972100734710693,
"learning_rate": 7.937797047988322e-06,
"loss": 0.4821,
"step": 1491
},
{
"epoch": 0.32,
"grad_norm": 0.1734873354434967,
"learning_rate": 7.934973013093397e-06,
"loss": 0.4922,
"step": 1492
},
{
"epoch": 0.32,
"grad_norm": 0.16801413893699646,
"learning_rate": 7.932147548998958e-06,
"loss": 0.5599,
"step": 1493
},
{
"epoch": 0.32,
"grad_norm": 0.12655183672904968,
"learning_rate": 7.929320657080886e-06,
"loss": 0.5432,
"step": 1494
},
{
"epoch": 0.32,
"grad_norm": 0.2155943512916565,
"learning_rate": 7.926492338715746e-06,
"loss": 0.5351,
"step": 1495
},
{
"epoch": 0.32,
"grad_norm": 0.1321111023426056,
"learning_rate": 7.923662595280799e-06,
"loss": 0.5267,
"step": 1496
},
{
"epoch": 0.32,
"grad_norm": 0.19633205235004425,
"learning_rate": 7.920831428154008e-06,
"loss": 0.5296,
"step": 1497
},
{
"epoch": 0.32,
"grad_norm": 0.19406452775001526,
"learning_rate": 7.917998838714019e-06,
"loss": 0.569,
"step": 1498
},
{
"epoch": 0.32,
"grad_norm": 0.17301122844219208,
"learning_rate": 7.915164828340179e-06,
"loss": 0.5303,
"step": 1499
},
{
"epoch": 0.32,
"grad_norm": 0.14050279557704926,
"learning_rate": 7.91232939841252e-06,
"loss": 0.5045,
"step": 1500
},
{
"epoch": 0.32,
"grad_norm": 0.13988257944583893,
"learning_rate": 7.909492550311769e-06,
"loss": 0.4965,
"step": 1501
},
{
"epoch": 0.32,
"grad_norm": 0.13999608159065247,
"learning_rate": 7.906654285419347e-06,
"loss": 0.5337,
"step": 1502
},
{
"epoch": 0.32,
"grad_norm": 0.18495085835456848,
"learning_rate": 7.903814605117355e-06,
"loss": 0.5266,
"step": 1503
},
{
"epoch": 0.32,
"grad_norm": 0.131727397441864,
"learning_rate": 7.900973510788595e-06,
"loss": 0.5131,
"step": 1504
},
{
"epoch": 0.32,
"grad_norm": 0.13659153878688812,
"learning_rate": 7.898131003816547e-06,
"loss": 0.4934,
"step": 1505
},
{
"epoch": 0.32,
"grad_norm": 0.22903259098529816,
"learning_rate": 7.895287085585386e-06,
"loss": 0.5258,
"step": 1506
},
{
"epoch": 0.32,
"grad_norm": 0.23151510953903198,
"learning_rate": 7.892441757479974e-06,
"loss": 0.5321,
"step": 1507
},
{
"epoch": 0.32,
"grad_norm": 0.18955311179161072,
"learning_rate": 7.889595020885853e-06,
"loss": 0.4939,
"step": 1508
},
{
"epoch": 0.33,
"grad_norm": 0.14848068356513977,
"learning_rate": 7.88674687718926e-06,
"loss": 0.4916,
"step": 1509
},
{
"epoch": 0.33,
"grad_norm": 0.13812664151191711,
"learning_rate": 7.883897327777108e-06,
"loss": 0.51,
"step": 1510
},
{
"epoch": 0.33,
"grad_norm": 0.14594610035419464,
"learning_rate": 7.881046374037002e-06,
"loss": 0.497,
"step": 1511
},
{
"epoch": 0.33,
"grad_norm": 0.18314702808856964,
"learning_rate": 7.878194017357229e-06,
"loss": 0.4968,
"step": 1512
},
{
"epoch": 0.33,
"grad_norm": 0.15771466493606567,
"learning_rate": 7.875340259126754e-06,
"loss": 0.5373,
"step": 1513
},
{
"epoch": 0.33,
"grad_norm": 0.15456095337867737,
"learning_rate": 7.87248510073523e-06,
"loss": 0.4797,
"step": 1514
},
{
"epoch": 0.33,
"grad_norm": 0.14819829165935516,
"learning_rate": 7.869628543572994e-06,
"loss": 0.4645,
"step": 1515
},
{
"epoch": 0.33,
"grad_norm": 0.16360363364219666,
"learning_rate": 7.866770589031057e-06,
"loss": 0.4941,
"step": 1516
},
{
"epoch": 0.33,
"grad_norm": 0.1475502848625183,
"learning_rate": 7.863911238501113e-06,
"loss": 0.5693,
"step": 1517
},
{
"epoch": 0.33,
"grad_norm": 0.17970135807991028,
"learning_rate": 7.86105049337554e-06,
"loss": 0.6145,
"step": 1518
},
{
"epoch": 0.33,
"grad_norm": 0.16100694239139557,
"learning_rate": 7.85818835504739e-06,
"loss": 0.4806,
"step": 1519
},
{
"epoch": 0.33,
"grad_norm": 0.18620309233665466,
"learning_rate": 7.855324824910395e-06,
"loss": 0.5659,
"step": 1520
},
{
"epoch": 0.33,
"grad_norm": 0.1660996675491333,
"learning_rate": 7.852459904358968e-06,
"loss": 0.5211,
"step": 1521
},
{
"epoch": 0.33,
"grad_norm": 0.18867598474025726,
"learning_rate": 7.849593594788192e-06,
"loss": 0.4975,
"step": 1522
},
{
"epoch": 0.33,
"grad_norm": 0.17060688138008118,
"learning_rate": 7.846725897593834e-06,
"loss": 0.527,
"step": 1523
},
{
"epoch": 0.33,
"grad_norm": 0.14144161343574524,
"learning_rate": 7.843856814172329e-06,
"loss": 0.478,
"step": 1524
},
{
"epoch": 0.33,
"grad_norm": 0.15240880846977234,
"learning_rate": 7.840986345920795e-06,
"loss": 0.4896,
"step": 1525
},
{
"epoch": 0.33,
"grad_norm": 0.1528806835412979,
"learning_rate": 7.83811449423702e-06,
"loss": 0.4968,
"step": 1526
},
{
"epoch": 0.33,
"grad_norm": 0.1606244146823883,
"learning_rate": 7.835241260519467e-06,
"loss": 0.4879,
"step": 1527
},
{
"epoch": 0.33,
"grad_norm": 0.14756283164024353,
"learning_rate": 7.832366646167268e-06,
"loss": 0.5135,
"step": 1528
},
{
"epoch": 0.33,
"grad_norm": 0.16397136449813843,
"learning_rate": 7.829490652580233e-06,
"loss": 0.5549,
"step": 1529
},
{
"epoch": 0.33,
"grad_norm": 0.1577044427394867,
"learning_rate": 7.82661328115884e-06,
"loss": 0.5037,
"step": 1530
},
{
"epoch": 0.33,
"grad_norm": 0.16425062716007233,
"learning_rate": 7.823734533304241e-06,
"loss": 0.5245,
"step": 1531
},
{
"epoch": 0.33,
"grad_norm": 0.18981023132801056,
"learning_rate": 7.820854410418255e-06,
"loss": 0.5009,
"step": 1532
},
{
"epoch": 0.33,
"grad_norm": 0.14500872790813446,
"learning_rate": 7.817972913903373e-06,
"loss": 0.4711,
"step": 1533
},
{
"epoch": 0.33,
"grad_norm": 0.2270984947681427,
"learning_rate": 7.815090045162752e-06,
"loss": 0.5454,
"step": 1534
},
{
"epoch": 0.33,
"grad_norm": 0.1595790833234787,
"learning_rate": 7.81220580560022e-06,
"loss": 0.5159,
"step": 1535
},
{
"epoch": 0.33,
"grad_norm": 0.18246832489967346,
"learning_rate": 7.809320196620272e-06,
"loss": 0.5324,
"step": 1536
},
{
"epoch": 0.33,
"grad_norm": 0.15763631463050842,
"learning_rate": 7.80643321962807e-06,
"loss": 0.5348,
"step": 1537
},
{
"epoch": 0.33,
"grad_norm": 0.1331566572189331,
"learning_rate": 7.80354487602944e-06,
"loss": 0.4746,
"step": 1538
},
{
"epoch": 0.33,
"grad_norm": 0.17700472474098206,
"learning_rate": 7.800655167230877e-06,
"loss": 0.5652,
"step": 1539
},
{
"epoch": 0.33,
"grad_norm": 0.15402348339557648,
"learning_rate": 7.797764094639537e-06,
"loss": 0.557,
"step": 1540
},
{
"epoch": 0.33,
"grad_norm": 0.17362762987613678,
"learning_rate": 7.794871659663242e-06,
"loss": 0.491,
"step": 1541
},
{
"epoch": 0.33,
"grad_norm": 0.14665651321411133,
"learning_rate": 7.79197786371048e-06,
"loss": 0.5373,
"step": 1542
},
{
"epoch": 0.33,
"grad_norm": 0.17219582200050354,
"learning_rate": 7.789082708190397e-06,
"loss": 0.4852,
"step": 1543
},
{
"epoch": 0.33,
"grad_norm": 0.15352313220500946,
"learning_rate": 7.786186194512802e-06,
"loss": 0.4926,
"step": 1544
},
{
"epoch": 0.33,
"grad_norm": 0.17823894321918488,
"learning_rate": 7.78328832408817e-06,
"loss": 0.5275,
"step": 1545
},
{
"epoch": 0.33,
"grad_norm": 0.20020678639411926,
"learning_rate": 7.780389098327629e-06,
"loss": 0.4786,
"step": 1546
},
{
"epoch": 0.33,
"grad_norm": 0.13879740238189697,
"learning_rate": 7.777488518642975e-06,
"loss": 0.5054,
"step": 1547
},
{
"epoch": 0.33,
"grad_norm": 0.1314191222190857,
"learning_rate": 7.774586586446658e-06,
"loss": 0.4901,
"step": 1548
},
{
"epoch": 0.33,
"grad_norm": 0.26172900199890137,
"learning_rate": 7.77168330315179e-06,
"loss": 0.5073,
"step": 1549
},
{
"epoch": 0.33,
"grad_norm": 0.15131932497024536,
"learning_rate": 7.768778670172135e-06,
"loss": 0.532,
"step": 1550
},
{
"epoch": 0.33,
"grad_norm": 0.14957192540168762,
"learning_rate": 7.76587268892212e-06,
"loss": 0.489,
"step": 1551
},
{
"epoch": 0.33,
"grad_norm": 0.15338850021362305,
"learning_rate": 7.762965360816828e-06,
"loss": 0.5161,
"step": 1552
},
{
"epoch": 0.33,
"grad_norm": 0.14951498806476593,
"learning_rate": 7.760056687271996e-06,
"loss": 0.545,
"step": 1553
},
{
"epoch": 0.33,
"grad_norm": 0.32918447256088257,
"learning_rate": 7.757146669704016e-06,
"loss": 0.5144,
"step": 1554
},
{
"epoch": 0.33,
"grad_norm": 0.1633896380662918,
"learning_rate": 7.754235309529939e-06,
"loss": 0.5305,
"step": 1555
},
{
"epoch": 0.34,
"grad_norm": 0.15538008511066437,
"learning_rate": 7.75132260816746e-06,
"loss": 0.5787,
"step": 1556
},
{
"epoch": 0.34,
"grad_norm": 0.16210249066352844,
"learning_rate": 7.748408567034938e-06,
"loss": 0.516,
"step": 1557
},
{
"epoch": 0.34,
"grad_norm": 0.140504851937294,
"learning_rate": 7.745493187551378e-06,
"loss": 0.5344,
"step": 1558
},
{
"epoch": 0.34,
"grad_norm": 0.1350797414779663,
"learning_rate": 7.74257647113644e-06,
"loss": 0.5773,
"step": 1559
},
{
"epoch": 0.34,
"grad_norm": 0.16812683641910553,
"learning_rate": 7.739658419210429e-06,
"loss": 0.4808,
"step": 1560
},
{
"epoch": 0.34,
"grad_norm": 0.15915554761886597,
"learning_rate": 7.73673903319431e-06,
"loss": 0.51,
"step": 1561
},
{
"epoch": 0.34,
"grad_norm": 0.14357538521289825,
"learning_rate": 7.733818314509689e-06,
"loss": 0.4821,
"step": 1562
},
{
"epoch": 0.34,
"grad_norm": 0.1362561285495758,
"learning_rate": 7.730896264578825e-06,
"loss": 0.5051,
"step": 1563
},
{
"epoch": 0.34,
"grad_norm": 0.29245832562446594,
"learning_rate": 7.727972884824625e-06,
"loss": 0.5387,
"step": 1564
},
{
"epoch": 0.34,
"grad_norm": 0.1896662563085556,
"learning_rate": 7.725048176670643e-06,
"loss": 0.5269,
"step": 1565
},
{
"epoch": 0.34,
"grad_norm": 0.16521599888801575,
"learning_rate": 7.72212214154108e-06,
"loss": 0.5207,
"step": 1566
},
{
"epoch": 0.34,
"grad_norm": 0.1532319337129593,
"learning_rate": 7.719194780860783e-06,
"loss": 0.4951,
"step": 1567
},
{
"epoch": 0.34,
"grad_norm": 0.15770648419857025,
"learning_rate": 7.716266096055243e-06,
"loss": 0.5328,
"step": 1568
},
{
"epoch": 0.34,
"grad_norm": 0.13383062183856964,
"learning_rate": 7.713336088550601e-06,
"loss": 0.5463,
"step": 1569
},
{
"epoch": 0.34,
"grad_norm": 0.2122948169708252,
"learning_rate": 7.710404759773637e-06,
"loss": 0.5193,
"step": 1570
},
{
"epoch": 0.34,
"grad_norm": 0.1524578481912613,
"learning_rate": 7.707472111151775e-06,
"loss": 0.5058,
"step": 1571
},
{
"epoch": 0.34,
"grad_norm": 0.1887030303478241,
"learning_rate": 7.704538144113082e-06,
"loss": 0.515,
"step": 1572
},
{
"epoch": 0.34,
"grad_norm": 0.18387439846992493,
"learning_rate": 7.70160286008627e-06,
"loss": 0.523,
"step": 1573
},
{
"epoch": 0.34,
"grad_norm": 0.1244322806596756,
"learning_rate": 7.698666260500688e-06,
"loss": 0.4878,
"step": 1574
},
{
"epoch": 0.34,
"grad_norm": 0.13694074749946594,
"learning_rate": 7.69572834678633e-06,
"loss": 0.4722,
"step": 1575
},
{
"epoch": 0.34,
"grad_norm": 0.17935697734355927,
"learning_rate": 7.692789120373824e-06,
"loss": 0.4532,
"step": 1576
},
{
"epoch": 0.34,
"grad_norm": 0.1903911679983139,
"learning_rate": 7.689848582694444e-06,
"loss": 0.5128,
"step": 1577
},
{
"epoch": 0.34,
"grad_norm": 0.15431609749794006,
"learning_rate": 7.686906735180099e-06,
"loss": 0.4882,
"step": 1578
},
{
"epoch": 0.34,
"grad_norm": 0.17097975313663483,
"learning_rate": 7.683963579263332e-06,
"loss": 0.5729,
"step": 1579
},
{
"epoch": 0.34,
"grad_norm": 0.14723485708236694,
"learning_rate": 7.681019116377331e-06,
"loss": 0.494,
"step": 1580
},
{
"epoch": 0.34,
"grad_norm": 0.17691069841384888,
"learning_rate": 7.678073347955918e-06,
"loss": 0.5062,
"step": 1581
},
{
"epoch": 0.34,
"grad_norm": 0.161320298910141,
"learning_rate": 7.675126275433545e-06,
"loss": 0.5685,
"step": 1582
},
{
"epoch": 0.34,
"grad_norm": 0.18011566996574402,
"learning_rate": 7.672177900245307e-06,
"loss": 0.5103,
"step": 1583
},
{
"epoch": 0.34,
"grad_norm": 0.16380946338176727,
"learning_rate": 7.669228223826926e-06,
"loss": 0.4897,
"step": 1584
},
{
"epoch": 0.34,
"grad_norm": 0.15541784465312958,
"learning_rate": 7.666277247614766e-06,
"loss": 0.4562,
"step": 1585
},
{
"epoch": 0.34,
"grad_norm": 0.21574871242046356,
"learning_rate": 7.663324973045818e-06,
"loss": 0.5683,
"step": 1586
},
{
"epoch": 0.34,
"grad_norm": 0.18054868280887604,
"learning_rate": 7.660371401557703e-06,
"loss": 0.5149,
"step": 1587
},
{
"epoch": 0.34,
"grad_norm": 0.1341419368982315,
"learning_rate": 7.657416534588683e-06,
"loss": 0.4946,
"step": 1588
},
{
"epoch": 0.34,
"grad_norm": 0.1958109736442566,
"learning_rate": 7.654460373577639e-06,
"loss": 0.5204,
"step": 1589
},
{
"epoch": 0.34,
"grad_norm": 0.13961777091026306,
"learning_rate": 7.651502919964092e-06,
"loss": 0.4753,
"step": 1590
},
{
"epoch": 0.34,
"grad_norm": 0.16249793767929077,
"learning_rate": 7.648544175188189e-06,
"loss": 0.5392,
"step": 1591
},
{
"epoch": 0.34,
"grad_norm": 0.17830121517181396,
"learning_rate": 7.645584140690702e-06,
"loss": 0.5414,
"step": 1592
},
{
"epoch": 0.34,
"grad_norm": 0.164913147687912,
"learning_rate": 7.642622817913036e-06,
"loss": 0.5127,
"step": 1593
},
{
"epoch": 0.34,
"grad_norm": 0.13776592910289764,
"learning_rate": 7.639660208297221e-06,
"loss": 0.4568,
"step": 1594
},
{
"epoch": 0.34,
"grad_norm": 0.4830784499645233,
"learning_rate": 7.636696313285917e-06,
"loss": 0.5153,
"step": 1595
},
{
"epoch": 0.34,
"grad_norm": 0.14156107604503632,
"learning_rate": 7.633731134322404e-06,
"loss": 0.5142,
"step": 1596
},
{
"epoch": 0.34,
"grad_norm": 0.1518123894929886,
"learning_rate": 7.630764672850593e-06,
"loss": 0.51,
"step": 1597
},
{
"epoch": 0.34,
"grad_norm": 0.17625145614147186,
"learning_rate": 7.6277969303150155e-06,
"loss": 0.495,
"step": 1598
},
{
"epoch": 0.34,
"grad_norm": 0.17110183835029602,
"learning_rate": 7.624827908160828e-06,
"loss": 0.5465,
"step": 1599
},
{
"epoch": 0.34,
"grad_norm": 0.18074309825897217,
"learning_rate": 7.6218576078338115e-06,
"loss": 0.519,
"step": 1600
},
{
"epoch": 0.34,
"grad_norm": 0.176472008228302,
"learning_rate": 7.618886030780366e-06,
"loss": 0.5301,
"step": 1601
},
{
"epoch": 0.35,
"grad_norm": 0.23984403908252716,
"learning_rate": 7.615913178447518e-06,
"loss": 0.5679,
"step": 1602
},
{
"epoch": 0.35,
"grad_norm": 0.16570177674293518,
"learning_rate": 7.612939052282913e-06,
"loss": 0.5353,
"step": 1603
},
{
"epoch": 0.35,
"grad_norm": 0.15504352748394012,
"learning_rate": 7.609963653734814e-06,
"loss": 0.4889,
"step": 1604
},
{
"epoch": 0.35,
"grad_norm": 0.12483610212802887,
"learning_rate": 7.606986984252107e-06,
"loss": 0.4901,
"step": 1605
},
{
"epoch": 0.35,
"grad_norm": 0.1474786102771759,
"learning_rate": 7.604009045284295e-06,
"loss": 0.5106,
"step": 1606
},
{
"epoch": 0.35,
"grad_norm": 0.1935417652130127,
"learning_rate": 7.601029838281503e-06,
"loss": 0.54,
"step": 1607
},
{
"epoch": 0.35,
"grad_norm": 0.15936410427093506,
"learning_rate": 7.598049364694466e-06,
"loss": 0.5259,
"step": 1608
},
{
"epoch": 0.35,
"grad_norm": 0.23374778032302856,
"learning_rate": 7.595067625974544e-06,
"loss": 0.4745,
"step": 1609
},
{
"epoch": 0.35,
"grad_norm": 0.1541801393032074,
"learning_rate": 7.592084623573708e-06,
"loss": 0.5009,
"step": 1610
},
{
"epoch": 0.35,
"grad_norm": 0.1573501080274582,
"learning_rate": 7.589100358944546e-06,
"loss": 0.5054,
"step": 1611
},
{
"epoch": 0.35,
"grad_norm": 0.14179089665412903,
"learning_rate": 7.586114833540257e-06,
"loss": 0.4971,
"step": 1612
},
{
"epoch": 0.35,
"grad_norm": 0.12740643322467804,
"learning_rate": 7.583128048814663e-06,
"loss": 0.5311,
"step": 1613
},
{
"epoch": 0.35,
"grad_norm": 0.18302515149116516,
"learning_rate": 7.58014000622219e-06,
"loss": 0.5443,
"step": 1614
},
{
"epoch": 0.35,
"grad_norm": 0.22869239747524261,
"learning_rate": 7.577150707217878e-06,
"loss": 0.5488,
"step": 1615
},
{
"epoch": 0.35,
"grad_norm": 0.11746443063020706,
"learning_rate": 7.574160153257386e-06,
"loss": 0.5052,
"step": 1616
},
{
"epoch": 0.35,
"grad_norm": 0.15382401645183563,
"learning_rate": 7.571168345796975e-06,
"loss": 0.5468,
"step": 1617
},
{
"epoch": 0.35,
"grad_norm": 0.18465621769428253,
"learning_rate": 7.568175286293522e-06,
"loss": 0.557,
"step": 1618
},
{
"epoch": 0.35,
"grad_norm": 0.14507010579109192,
"learning_rate": 7.5651809762045115e-06,
"loss": 0.4686,
"step": 1619
},
{
"epoch": 0.35,
"grad_norm": 0.17526701092720032,
"learning_rate": 7.562185416988039e-06,
"loss": 0.5065,
"step": 1620
},
{
"epoch": 0.35,
"grad_norm": 0.16445392370224,
"learning_rate": 7.559188610102803e-06,
"loss": 0.4226,
"step": 1621
},
{
"epoch": 0.35,
"grad_norm": 0.13059720396995544,
"learning_rate": 7.556190557008116e-06,
"loss": 0.4899,
"step": 1622
},
{
"epoch": 0.35,
"grad_norm": 0.19847136735916138,
"learning_rate": 7.553191259163896e-06,
"loss": 0.5169,
"step": 1623
},
{
"epoch": 0.35,
"grad_norm": 0.1679173707962036,
"learning_rate": 7.550190718030663e-06,
"loss": 0.5012,
"step": 1624
},
{
"epoch": 0.35,
"grad_norm": 0.15986262261867523,
"learning_rate": 7.547188935069547e-06,
"loss": 0.5436,
"step": 1625
},
{
"epoch": 0.35,
"grad_norm": 0.13230155408382416,
"learning_rate": 7.54418591174228e-06,
"loss": 0.5307,
"step": 1626
},
{
"epoch": 0.35,
"grad_norm": 0.13571912050247192,
"learning_rate": 7.5411816495111985e-06,
"loss": 0.5169,
"step": 1627
},
{
"epoch": 0.35,
"grad_norm": 0.17367611825466156,
"learning_rate": 7.5381761498392435e-06,
"loss": 0.5677,
"step": 1628
},
{
"epoch": 0.35,
"grad_norm": 0.1747978776693344,
"learning_rate": 7.535169414189959e-06,
"loss": 0.5706,
"step": 1629
},
{
"epoch": 0.35,
"grad_norm": 0.11080675572156906,
"learning_rate": 7.532161444027488e-06,
"loss": 0.4933,
"step": 1630
},
{
"epoch": 0.35,
"grad_norm": 0.1479070633649826,
"learning_rate": 7.529152240816577e-06,
"loss": 0.4794,
"step": 1631
},
{
"epoch": 0.35,
"grad_norm": 0.12181144952774048,
"learning_rate": 7.526141806022571e-06,
"loss": 0.5346,
"step": 1632
},
{
"epoch": 0.35,
"grad_norm": 0.18355728685855865,
"learning_rate": 7.523130141111419e-06,
"loss": 0.5696,
"step": 1633
},
{
"epoch": 0.35,
"grad_norm": 0.12792839109897614,
"learning_rate": 7.520117247549661e-06,
"loss": 0.5148,
"step": 1634
},
{
"epoch": 0.35,
"grad_norm": 0.17084498703479767,
"learning_rate": 7.517103126804446e-06,
"loss": 0.5362,
"step": 1635
},
{
"epoch": 0.35,
"grad_norm": 0.1391141563653946,
"learning_rate": 7.514087780343511e-06,
"loss": 0.4839,
"step": 1636
},
{
"epoch": 0.35,
"grad_norm": 0.13675713539123535,
"learning_rate": 7.511071209635197e-06,
"loss": 0.5153,
"step": 1637
},
{
"epoch": 0.35,
"grad_norm": 0.13880731165409088,
"learning_rate": 7.508053416148433e-06,
"loss": 0.5117,
"step": 1638
},
{
"epoch": 0.35,
"grad_norm": 0.11620379984378815,
"learning_rate": 7.5050344013527535e-06,
"loss": 0.5146,
"step": 1639
},
{
"epoch": 0.35,
"grad_norm": 0.1520024538040161,
"learning_rate": 7.502014166718279e-06,
"loss": 0.5332,
"step": 1640
},
{
"epoch": 0.35,
"grad_norm": 0.16113972663879395,
"learning_rate": 7.49899271371573e-06,
"loss": 0.4881,
"step": 1641
},
{
"epoch": 0.35,
"grad_norm": 0.177647203207016,
"learning_rate": 7.495970043816416e-06,
"loss": 0.506,
"step": 1642
},
{
"epoch": 0.35,
"grad_norm": 0.20048052072525024,
"learning_rate": 7.492946158492243e-06,
"loss": 0.5128,
"step": 1643
},
{
"epoch": 0.35,
"grad_norm": 0.18544965982437134,
"learning_rate": 7.489921059215703e-06,
"loss": 0.4755,
"step": 1644
},
{
"epoch": 0.35,
"grad_norm": 0.15983660519123077,
"learning_rate": 7.486894747459887e-06,
"loss": 0.5021,
"step": 1645
},
{
"epoch": 0.35,
"grad_norm": 0.13609494268894196,
"learning_rate": 7.483867224698471e-06,
"loss": 0.5392,
"step": 1646
},
{
"epoch": 0.35,
"grad_norm": 0.15707872807979584,
"learning_rate": 7.480838492405722e-06,
"loss": 0.5503,
"step": 1647
},
{
"epoch": 0.36,
"grad_norm": 0.14846757054328918,
"learning_rate": 7.477808552056496e-06,
"loss": 0.5162,
"step": 1648
},
{
"epoch": 0.36,
"grad_norm": 0.20370322465896606,
"learning_rate": 7.474777405126236e-06,
"loss": 0.5291,
"step": 1649
},
{
"epoch": 0.36,
"grad_norm": 0.19087088108062744,
"learning_rate": 7.471745053090976e-06,
"loss": 0.5647,
"step": 1650
},
{
"epoch": 0.36,
"grad_norm": 0.1674560159444809,
"learning_rate": 7.468711497427335e-06,
"loss": 0.502,
"step": 1651
},
{
"epoch": 0.36,
"grad_norm": 0.1854984611272812,
"learning_rate": 7.465676739612514e-06,
"loss": 0.5304,
"step": 1652
},
{
"epoch": 0.36,
"grad_norm": 0.17334036529064178,
"learning_rate": 7.462640781124309e-06,
"loss": 0.5476,
"step": 1653
},
{
"epoch": 0.36,
"grad_norm": 0.1636764258146286,
"learning_rate": 7.45960362344109e-06,
"loss": 0.5359,
"step": 1654
},
{
"epoch": 0.36,
"grad_norm": 0.16120000183582306,
"learning_rate": 7.456565268041815e-06,
"loss": 0.5591,
"step": 1655
},
{
"epoch": 0.36,
"grad_norm": 0.16681008040905,
"learning_rate": 7.4535257164060324e-06,
"loss": 0.4933,
"step": 1656
},
{
"epoch": 0.36,
"grad_norm": 0.15936830639839172,
"learning_rate": 7.450484970013863e-06,
"loss": 0.4903,
"step": 1657
},
{
"epoch": 0.36,
"grad_norm": 0.1579248011112213,
"learning_rate": 7.447443030346011e-06,
"loss": 0.5368,
"step": 1658
},
{
"epoch": 0.36,
"grad_norm": 0.17494046688079834,
"learning_rate": 7.444399898883768e-06,
"loss": 0.4972,
"step": 1659
},
{
"epoch": 0.36,
"grad_norm": 0.15343308448791504,
"learning_rate": 7.441355577108998e-06,
"loss": 0.485,
"step": 1660
},
{
"epoch": 0.36,
"grad_norm": 0.24387070536613464,
"learning_rate": 7.438310066504152e-06,
"loss": 0.5527,
"step": 1661
},
{
"epoch": 0.36,
"grad_norm": 0.27083417773246765,
"learning_rate": 7.4352633685522535e-06,
"loss": 0.4657,
"step": 1662
},
{
"epoch": 0.36,
"grad_norm": 0.20291651785373688,
"learning_rate": 7.432215484736909e-06,
"loss": 0.4805,
"step": 1663
},
{
"epoch": 0.36,
"grad_norm": 0.17441540956497192,
"learning_rate": 7.4291664165422985e-06,
"loss": 0.5157,
"step": 1664
},
{
"epoch": 0.36,
"grad_norm": 0.21364037692546844,
"learning_rate": 7.426116165453181e-06,
"loss": 0.5072,
"step": 1665
},
{
"epoch": 0.36,
"grad_norm": 0.16811180114746094,
"learning_rate": 7.423064732954895e-06,
"loss": 0.4577,
"step": 1666
},
{
"epoch": 0.36,
"grad_norm": 0.2634996473789215,
"learning_rate": 7.420012120533346e-06,
"loss": 0.5387,
"step": 1667
},
{
"epoch": 0.36,
"grad_norm": 0.15785469114780426,
"learning_rate": 7.4169583296750194e-06,
"loss": 0.5052,
"step": 1668
},
{
"epoch": 0.36,
"grad_norm": 0.18810074031352997,
"learning_rate": 7.4139033618669764e-06,
"loss": 0.5234,
"step": 1669
},
{
"epoch": 0.36,
"grad_norm": 0.14630138874053955,
"learning_rate": 7.410847218596846e-06,
"loss": 0.5155,
"step": 1670
},
{
"epoch": 0.36,
"grad_norm": 0.18249250948429108,
"learning_rate": 7.407789901352831e-06,
"loss": 0.5351,
"step": 1671
},
{
"epoch": 0.36,
"grad_norm": 0.13652457296848297,
"learning_rate": 7.40473141162371e-06,
"loss": 0.4474,
"step": 1672
},
{
"epoch": 0.36,
"grad_norm": 0.18352244794368744,
"learning_rate": 7.401671750898829e-06,
"loss": 0.4628,
"step": 1673
},
{
"epoch": 0.36,
"grad_norm": 0.16410337388515472,
"learning_rate": 7.398610920668102e-06,
"loss": 0.5673,
"step": 1674
},
{
"epoch": 0.36,
"grad_norm": 0.14850519597530365,
"learning_rate": 7.39554892242202e-06,
"loss": 0.48,
"step": 1675
},
{
"epoch": 0.36,
"grad_norm": 0.1457439661026001,
"learning_rate": 7.392485757651634e-06,
"loss": 0.5061,
"step": 1676
},
{
"epoch": 0.36,
"grad_norm": 0.15839837491512299,
"learning_rate": 7.3894214278485685e-06,
"loss": 0.5482,
"step": 1677
},
{
"epoch": 0.36,
"grad_norm": 0.1379930078983307,
"learning_rate": 7.386355934505015e-06,
"loss": 0.5207,
"step": 1678
},
{
"epoch": 0.36,
"grad_norm": 0.2140752226114273,
"learning_rate": 7.38328927911373e-06,
"loss": 0.5709,
"step": 1679
},
{
"epoch": 0.36,
"grad_norm": 0.16319052875041962,
"learning_rate": 7.380221463168036e-06,
"loss": 0.5182,
"step": 1680
},
{
"epoch": 0.36,
"grad_norm": 0.12774449586868286,
"learning_rate": 7.3771524881618204e-06,
"loss": 0.5274,
"step": 1681
},
{
"epoch": 0.36,
"grad_norm": 0.13371047377586365,
"learning_rate": 7.374082355589536e-06,
"loss": 0.4983,
"step": 1682
},
{
"epoch": 0.36,
"grad_norm": 0.13684460520744324,
"learning_rate": 7.371011066946199e-06,
"loss": 0.5395,
"step": 1683
},
{
"epoch": 0.36,
"grad_norm": 0.16260729730129242,
"learning_rate": 7.367938623727389e-06,
"loss": 0.4927,
"step": 1684
},
{
"epoch": 0.36,
"grad_norm": 0.1580437868833542,
"learning_rate": 7.364865027429247e-06,
"loss": 0.5391,
"step": 1685
},
{
"epoch": 0.36,
"grad_norm": 0.41100969910621643,
"learning_rate": 7.361790279548476e-06,
"loss": 0.4922,
"step": 1686
},
{
"epoch": 0.36,
"grad_norm": 0.16328592598438263,
"learning_rate": 7.358714381582339e-06,
"loss": 0.5809,
"step": 1687
},
{
"epoch": 0.36,
"grad_norm": 0.16407454013824463,
"learning_rate": 7.35563733502866e-06,
"loss": 0.5317,
"step": 1688
},
{
"epoch": 0.36,
"grad_norm": 0.16385860741138458,
"learning_rate": 7.352559141385823e-06,
"loss": 0.5182,
"step": 1689
},
{
"epoch": 0.36,
"grad_norm": 0.1773800253868103,
"learning_rate": 7.3494798021527665e-06,
"loss": 0.4972,
"step": 1690
},
{
"epoch": 0.36,
"grad_norm": 0.14111146330833435,
"learning_rate": 7.346399318828994e-06,
"loss": 0.485,
"step": 1691
},
{
"epoch": 0.36,
"grad_norm": 0.18736319243907928,
"learning_rate": 7.3433176929145574e-06,
"loss": 0.532,
"step": 1692
},
{
"epoch": 0.36,
"grad_norm": 0.1659240871667862,
"learning_rate": 7.3402349259100725e-06,
"loss": 0.4878,
"step": 1693
},
{
"epoch": 0.36,
"grad_norm": 0.13603948056697845,
"learning_rate": 7.337151019316708e-06,
"loss": 0.5024,
"step": 1694
},
{
"epoch": 0.37,
"grad_norm": 0.14938659965991974,
"learning_rate": 7.334065974636186e-06,
"loss": 0.4882,
"step": 1695
},
{
"epoch": 0.37,
"grad_norm": 0.15664424002170563,
"learning_rate": 7.330979793370784e-06,
"loss": 0.4855,
"step": 1696
},
{
"epoch": 0.37,
"grad_norm": 0.15226437151432037,
"learning_rate": 7.327892477023335e-06,
"loss": 0.5258,
"step": 1697
},
{
"epoch": 0.37,
"grad_norm": 0.20304326713085175,
"learning_rate": 7.324804027097221e-06,
"loss": 0.5325,
"step": 1698
},
{
"epoch": 0.37,
"grad_norm": 0.14442868530750275,
"learning_rate": 7.3217144450963774e-06,
"loss": 0.4676,
"step": 1699
},
{
"epoch": 0.37,
"grad_norm": 0.14504297077655792,
"learning_rate": 7.318623732525294e-06,
"loss": 0.523,
"step": 1700
},
{
"epoch": 0.37,
"grad_norm": 0.13879434764385223,
"learning_rate": 7.315531890889007e-06,
"loss": 0.5121,
"step": 1701
},
{
"epoch": 0.37,
"grad_norm": 0.16492860019207,
"learning_rate": 7.312438921693101e-06,
"loss": 0.508,
"step": 1702
},
{
"epoch": 0.37,
"grad_norm": 0.13094115257263184,
"learning_rate": 7.309344826443718e-06,
"loss": 0.5123,
"step": 1703
},
{
"epoch": 0.37,
"grad_norm": 0.16071003675460815,
"learning_rate": 7.30624960664754e-06,
"loss": 0.5077,
"step": 1704
},
{
"epoch": 0.37,
"grad_norm": 0.1596524864435196,
"learning_rate": 7.3031532638117974e-06,
"loss": 0.5193,
"step": 1705
},
{
"epoch": 0.37,
"grad_norm": 0.15532274544239044,
"learning_rate": 7.300055799444273e-06,
"loss": 0.5651,
"step": 1706
},
{
"epoch": 0.37,
"grad_norm": 0.1956198513507843,
"learning_rate": 7.296957215053292e-06,
"loss": 0.5238,
"step": 1707
},
{
"epoch": 0.37,
"grad_norm": 0.17350712418556213,
"learning_rate": 7.293857512147723e-06,
"loss": 0.5064,
"step": 1708
},
{
"epoch": 0.37,
"grad_norm": 0.1837831437587738,
"learning_rate": 7.290756692236982e-06,
"loss": 0.5456,
"step": 1709
},
{
"epoch": 0.37,
"grad_norm": 0.20104587078094482,
"learning_rate": 7.287654756831031e-06,
"loss": 0.5701,
"step": 1710
},
{
"epoch": 0.37,
"grad_norm": 0.22067013382911682,
"learning_rate": 7.284551707440369e-06,
"loss": 0.4858,
"step": 1711
},
{
"epoch": 0.37,
"grad_norm": 0.17873504757881165,
"learning_rate": 7.2814475455760445e-06,
"loss": 0.5027,
"step": 1712
},
{
"epoch": 0.37,
"grad_norm": 0.16447962820529938,
"learning_rate": 7.278342272749643e-06,
"loss": 0.4854,
"step": 1713
},
{
"epoch": 0.37,
"grad_norm": 0.18496006727218628,
"learning_rate": 7.275235890473291e-06,
"loss": 0.5098,
"step": 1714
},
{
"epoch": 0.37,
"grad_norm": 0.20452427864074707,
"learning_rate": 7.272128400259658e-06,
"loss": 0.4419,
"step": 1715
},
{
"epoch": 0.37,
"grad_norm": 0.16275016963481903,
"learning_rate": 7.269019803621953e-06,
"loss": 0.535,
"step": 1716
},
{
"epoch": 0.37,
"grad_norm": 0.15786287188529968,
"learning_rate": 7.2659101020739195e-06,
"loss": 0.4883,
"step": 1717
},
{
"epoch": 0.37,
"grad_norm": 0.1765165776014328,
"learning_rate": 7.262799297129843e-06,
"loss": 0.5827,
"step": 1718
},
{
"epoch": 0.37,
"grad_norm": 0.12849071621894836,
"learning_rate": 7.259687390304546e-06,
"loss": 0.4739,
"step": 1719
},
{
"epoch": 0.37,
"grad_norm": 0.18336515128612518,
"learning_rate": 7.256574383113386e-06,
"loss": 0.5344,
"step": 1720
},
{
"epoch": 0.37,
"grad_norm": 0.14962013065814972,
"learning_rate": 7.253460277072258e-06,
"loss": 0.4984,
"step": 1721
},
{
"epoch": 0.37,
"grad_norm": 0.14270378649234772,
"learning_rate": 7.25034507369759e-06,
"loss": 0.491,
"step": 1722
},
{
"epoch": 0.37,
"grad_norm": 0.18622830510139465,
"learning_rate": 7.247228774506347e-06,
"loss": 0.5553,
"step": 1723
},
{
"epoch": 0.37,
"grad_norm": 0.16195961833000183,
"learning_rate": 7.244111381016024e-06,
"loss": 0.5497,
"step": 1724
},
{
"epoch": 0.37,
"grad_norm": 0.1802990436553955,
"learning_rate": 7.2409928947446526e-06,
"loss": 0.5371,
"step": 1725
},
{
"epoch": 0.37,
"grad_norm": 0.1768779754638672,
"learning_rate": 7.237873317210796e-06,
"loss": 0.5328,
"step": 1726
},
{
"epoch": 0.37,
"grad_norm": 0.15915416181087494,
"learning_rate": 7.234752649933545e-06,
"loss": 0.5206,
"step": 1727
},
{
"epoch": 0.37,
"grad_norm": 0.22865630686283112,
"learning_rate": 7.231630894432527e-06,
"loss": 0.5433,
"step": 1728
},
{
"epoch": 0.37,
"grad_norm": 0.13628236949443817,
"learning_rate": 7.228508052227895e-06,
"loss": 0.4809,
"step": 1729
},
{
"epoch": 0.37,
"grad_norm": 0.1925947070121765,
"learning_rate": 7.22538412484033e-06,
"loss": 0.5716,
"step": 1730
},
{
"epoch": 0.37,
"grad_norm": 0.14507855474948883,
"learning_rate": 7.2222591137910454e-06,
"loss": 0.5409,
"step": 1731
},
{
"epoch": 0.37,
"grad_norm": 0.1448884755373001,
"learning_rate": 7.219133020601783e-06,
"loss": 0.5184,
"step": 1732
},
{
"epoch": 0.37,
"grad_norm": 0.24185587465763092,
"learning_rate": 7.216005846794807e-06,
"loss": 0.5093,
"step": 1733
},
{
"epoch": 0.37,
"grad_norm": 0.14733339846134186,
"learning_rate": 7.2128775938929095e-06,
"loss": 0.5361,
"step": 1734
},
{
"epoch": 0.37,
"grad_norm": 0.1741349697113037,
"learning_rate": 7.209748263419409e-06,
"loss": 0.5405,
"step": 1735
},
{
"epoch": 0.37,
"grad_norm": 0.16004079580307007,
"learning_rate": 7.206617856898149e-06,
"loss": 0.5217,
"step": 1736
},
{
"epoch": 0.37,
"grad_norm": 0.16466408967971802,
"learning_rate": 7.203486375853496e-06,
"loss": 0.4928,
"step": 1737
},
{
"epoch": 0.37,
"grad_norm": 0.17737893760204315,
"learning_rate": 7.20035382181034e-06,
"loss": 0.5084,
"step": 1738
},
{
"epoch": 0.37,
"grad_norm": 0.33183491230010986,
"learning_rate": 7.197220196294094e-06,
"loss": 0.5574,
"step": 1739
},
{
"epoch": 0.37,
"grad_norm": 0.14042764902114868,
"learning_rate": 7.194085500830691e-06,
"loss": 0.5856,
"step": 1740
},
{
"epoch": 0.38,
"grad_norm": 0.17238366603851318,
"learning_rate": 7.190949736946587e-06,
"loss": 0.5456,
"step": 1741
},
{
"epoch": 0.38,
"grad_norm": 0.17922283709049225,
"learning_rate": 7.1878129061687595e-06,
"loss": 0.5223,
"step": 1742
},
{
"epoch": 0.38,
"grad_norm": 0.14631612598896027,
"learning_rate": 7.184675010024701e-06,
"loss": 0.5193,
"step": 1743
},
{
"epoch": 0.38,
"grad_norm": 0.1614404171705246,
"learning_rate": 7.181536050042427e-06,
"loss": 0.5372,
"step": 1744
},
{
"epoch": 0.38,
"grad_norm": 0.14466199278831482,
"learning_rate": 7.1783960277504685e-06,
"loss": 0.4811,
"step": 1745
},
{
"epoch": 0.38,
"grad_norm": 0.14429622888565063,
"learning_rate": 7.175254944677874e-06,
"loss": 0.4989,
"step": 1746
},
{
"epoch": 0.38,
"grad_norm": 0.1409209966659546,
"learning_rate": 7.172112802354212e-06,
"loss": 0.5104,
"step": 1747
},
{
"epoch": 0.38,
"grad_norm": 0.19490914046764374,
"learning_rate": 7.1689696023095625e-06,
"loss": 0.5189,
"step": 1748
},
{
"epoch": 0.38,
"grad_norm": 0.20314301550388336,
"learning_rate": 7.165825346074521e-06,
"loss": 0.5169,
"step": 1749
},
{
"epoch": 0.38,
"grad_norm": 0.1676884889602661,
"learning_rate": 7.162680035180201e-06,
"loss": 0.5543,
"step": 1750
},
{
"epoch": 0.38,
"grad_norm": 0.17340156435966492,
"learning_rate": 7.159533671158225e-06,
"loss": 0.5374,
"step": 1751
},
{
"epoch": 0.38,
"grad_norm": 0.1684662252664566,
"learning_rate": 7.156386255540732e-06,
"loss": 0.5167,
"step": 1752
},
{
"epoch": 0.38,
"grad_norm": 0.1722518354654312,
"learning_rate": 7.15323778986037e-06,
"loss": 0.5236,
"step": 1753
},
{
"epoch": 0.38,
"grad_norm": 0.1535075604915619,
"learning_rate": 7.150088275650302e-06,
"loss": 0.5676,
"step": 1754
},
{
"epoch": 0.38,
"grad_norm": 0.2000323235988617,
"learning_rate": 7.1469377144441954e-06,
"loss": 0.5039,
"step": 1755
},
{
"epoch": 0.38,
"grad_norm": 0.1701248437166214,
"learning_rate": 7.143786107776236e-06,
"loss": 0.5528,
"step": 1756
},
{
"epoch": 0.38,
"grad_norm": 0.15805946290493011,
"learning_rate": 7.140633457181112e-06,
"loss": 0.4744,
"step": 1757
},
{
"epoch": 0.38,
"grad_norm": 0.1715155392885208,
"learning_rate": 7.137479764194022e-06,
"loss": 0.5385,
"step": 1758
},
{
"epoch": 0.38,
"grad_norm": 0.20759384334087372,
"learning_rate": 7.134325030350672e-06,
"loss": 0.4994,
"step": 1759
},
{
"epoch": 0.38,
"grad_norm": 0.1527446210384369,
"learning_rate": 7.131169257187276e-06,
"loss": 0.5411,
"step": 1760
},
{
"epoch": 0.38,
"grad_norm": 0.15912318229675293,
"learning_rate": 7.128012446240552e-06,
"loss": 0.5674,
"step": 1761
},
{
"epoch": 0.38,
"grad_norm": 0.1656845211982727,
"learning_rate": 7.1248545990477256e-06,
"loss": 0.4999,
"step": 1762
},
{
"epoch": 0.38,
"grad_norm": 0.14019495248794556,
"learning_rate": 7.121695717146526e-06,
"loss": 0.5353,
"step": 1763
},
{
"epoch": 0.38,
"grad_norm": 0.17298150062561035,
"learning_rate": 7.1185358020751875e-06,
"loss": 0.5064,
"step": 1764
},
{
"epoch": 0.38,
"grad_norm": 0.14910168945789337,
"learning_rate": 7.1153748553724425e-06,
"loss": 0.5262,
"step": 1765
},
{
"epoch": 0.38,
"grad_norm": 0.20957139134407043,
"learning_rate": 7.112212878577533e-06,
"loss": 0.5084,
"step": 1766
},
{
"epoch": 0.38,
"grad_norm": 0.17487388849258423,
"learning_rate": 7.109049873230198e-06,
"loss": 0.5578,
"step": 1767
},
{
"epoch": 0.38,
"grad_norm": 0.20940136909484863,
"learning_rate": 7.1058858408706765e-06,
"loss": 0.5895,
"step": 1768
},
{
"epoch": 0.38,
"grad_norm": 0.23022903501987457,
"learning_rate": 7.1027207830397134e-06,
"loss": 0.5334,
"step": 1769
},
{
"epoch": 0.38,
"grad_norm": 0.15674887597560883,
"learning_rate": 7.099554701278547e-06,
"loss": 0.5144,
"step": 1770
},
{
"epoch": 0.38,
"grad_norm": 0.15679983794689178,
"learning_rate": 7.096387597128916e-06,
"loss": 0.5139,
"step": 1771
},
{
"epoch": 0.38,
"grad_norm": 0.19758965075016022,
"learning_rate": 7.093219472133059e-06,
"loss": 0.5184,
"step": 1772
},
{
"epoch": 0.38,
"grad_norm": 0.17212289571762085,
"learning_rate": 7.0900503278337074e-06,
"loss": 0.5164,
"step": 1773
},
{
"epoch": 0.38,
"grad_norm": 0.18704959750175476,
"learning_rate": 7.086880165774093e-06,
"loss": 0.5332,
"step": 1774
},
{
"epoch": 0.38,
"grad_norm": 0.1653163731098175,
"learning_rate": 7.083708987497943e-06,
"loss": 0.536,
"step": 1775
},
{
"epoch": 0.38,
"grad_norm": 0.1986512988805771,
"learning_rate": 7.080536794549477e-06,
"loss": 0.5382,
"step": 1776
},
{
"epoch": 0.38,
"grad_norm": 0.15724928677082062,
"learning_rate": 7.077363588473408e-06,
"loss": 0.5549,
"step": 1777
},
{
"epoch": 0.38,
"grad_norm": 0.14671437442302704,
"learning_rate": 7.0741893708149475e-06,
"loss": 0.5662,
"step": 1778
},
{
"epoch": 0.38,
"grad_norm": 0.15560339391231537,
"learning_rate": 7.071014143119796e-06,
"loss": 0.5198,
"step": 1779
},
{
"epoch": 0.38,
"grad_norm": 0.14752082526683807,
"learning_rate": 7.067837906934143e-06,
"loss": 0.5337,
"step": 1780
},
{
"epoch": 0.38,
"grad_norm": 0.13522642850875854,
"learning_rate": 7.064660663804677e-06,
"loss": 0.5066,
"step": 1781
},
{
"epoch": 0.38,
"grad_norm": 0.1374634951353073,
"learning_rate": 7.061482415278569e-06,
"loss": 0.4911,
"step": 1782
},
{
"epoch": 0.38,
"grad_norm": 0.18049356341362,
"learning_rate": 7.058303162903483e-06,
"loss": 0.5261,
"step": 1783
},
{
"epoch": 0.38,
"grad_norm": 0.17125682532787323,
"learning_rate": 7.055122908227571e-06,
"loss": 0.5311,
"step": 1784
},
{
"epoch": 0.38,
"grad_norm": 0.16370706260204315,
"learning_rate": 7.051941652799476e-06,
"loss": 0.4968,
"step": 1785
},
{
"epoch": 0.38,
"grad_norm": 0.1682046800851822,
"learning_rate": 7.0487593981683246e-06,
"loss": 0.4958,
"step": 1786
},
{
"epoch": 0.38,
"grad_norm": 0.1765281856060028,
"learning_rate": 7.04557614588373e-06,
"loss": 0.5139,
"step": 1787
},
{
"epoch": 0.39,
"grad_norm": 0.33266332745552063,
"learning_rate": 7.042391897495795e-06,
"loss": 0.5654,
"step": 1788
},
{
"epoch": 0.39,
"grad_norm": 0.1499028503894806,
"learning_rate": 7.039206654555103e-06,
"loss": 0.4745,
"step": 1789
},
{
"epoch": 0.39,
"grad_norm": 0.1392756998538971,
"learning_rate": 7.036020418612724e-06,
"loss": 0.5564,
"step": 1790
},
{
"epoch": 0.39,
"grad_norm": 0.1803901195526123,
"learning_rate": 7.032833191220213e-06,
"loss": 0.4915,
"step": 1791
},
{
"epoch": 0.39,
"grad_norm": 0.17533114552497864,
"learning_rate": 7.029644973929604e-06,
"loss": 0.4861,
"step": 1792
},
{
"epoch": 0.39,
"grad_norm": 0.1752566695213318,
"learning_rate": 7.026455768293416e-06,
"loss": 0.508,
"step": 1793
},
{
"epoch": 0.39,
"grad_norm": 0.14547456800937653,
"learning_rate": 7.023265575864648e-06,
"loss": 0.5137,
"step": 1794
},
{
"epoch": 0.39,
"grad_norm": 0.19993162155151367,
"learning_rate": 7.020074398196779e-06,
"loss": 0.5089,
"step": 1795
},
{
"epoch": 0.39,
"grad_norm": 0.28430238366127014,
"learning_rate": 7.016882236843769e-06,
"loss": 0.536,
"step": 1796
},
{
"epoch": 0.39,
"grad_norm": 0.16877298057079315,
"learning_rate": 7.013689093360059e-06,
"loss": 0.5131,
"step": 1797
},
{
"epoch": 0.39,
"grad_norm": 0.12015072256326675,
"learning_rate": 7.0104949693005645e-06,
"loss": 0.4872,
"step": 1798
},
{
"epoch": 0.39,
"grad_norm": 0.154635950922966,
"learning_rate": 7.0072998662206775e-06,
"loss": 0.5255,
"step": 1799
},
{
"epoch": 0.39,
"grad_norm": 0.1528724581003189,
"learning_rate": 7.00410378567627e-06,
"loss": 0.5689,
"step": 1800
},
{
"epoch": 0.39,
"grad_norm": 0.1700393408536911,
"learning_rate": 7.000906729223693e-06,
"loss": 0.4934,
"step": 1801
},
{
"epoch": 0.39,
"grad_norm": 0.1635403037071228,
"learning_rate": 6.997708698419765e-06,
"loss": 0.4775,
"step": 1802
},
{
"epoch": 0.39,
"grad_norm": 0.14558027684688568,
"learning_rate": 6.994509694821784e-06,
"loss": 0.5529,
"step": 1803
},
{
"epoch": 0.39,
"grad_norm": 0.1189364641904831,
"learning_rate": 6.99130971998752e-06,
"loss": 0.5022,
"step": 1804
},
{
"epoch": 0.39,
"grad_norm": 0.17554467916488647,
"learning_rate": 6.988108775475218e-06,
"loss": 0.5326,
"step": 1805
},
{
"epoch": 0.39,
"grad_norm": 0.15480519831180573,
"learning_rate": 6.98490686284359e-06,
"loss": 0.4882,
"step": 1806
},
{
"epoch": 0.39,
"grad_norm": 0.1570086032152176,
"learning_rate": 6.981703983651827e-06,
"loss": 0.4771,
"step": 1807
},
{
"epoch": 0.39,
"grad_norm": 0.14414653182029724,
"learning_rate": 6.978500139459583e-06,
"loss": 0.4844,
"step": 1808
},
{
"epoch": 0.39,
"grad_norm": 0.181270033121109,
"learning_rate": 6.97529533182699e-06,
"loss": 0.6205,
"step": 1809
},
{
"epoch": 0.39,
"grad_norm": 0.13571658730506897,
"learning_rate": 6.972089562314644e-06,
"loss": 0.5364,
"step": 1810
},
{
"epoch": 0.39,
"grad_norm": 0.12950097024440765,
"learning_rate": 6.968882832483606e-06,
"loss": 0.5254,
"step": 1811
},
{
"epoch": 0.39,
"grad_norm": 0.15108050405979156,
"learning_rate": 6.9656751438954115e-06,
"loss": 0.5432,
"step": 1812
},
{
"epoch": 0.39,
"grad_norm": 0.1494326889514923,
"learning_rate": 6.962466498112062e-06,
"loss": 0.5615,
"step": 1813
},
{
"epoch": 0.39,
"grad_norm": 0.17007635533809662,
"learning_rate": 6.959256896696021e-06,
"loss": 0.5191,
"step": 1814
},
{
"epoch": 0.39,
"grad_norm": 0.16112545132637024,
"learning_rate": 6.956046341210221e-06,
"loss": 0.5374,
"step": 1815
},
{
"epoch": 0.39,
"grad_norm": 0.1815643608570099,
"learning_rate": 6.952834833218056e-06,
"loss": 0.5312,
"step": 1816
},
{
"epoch": 0.39,
"grad_norm": 0.14015376567840576,
"learning_rate": 6.949622374283387e-06,
"loss": 0.5012,
"step": 1817
},
{
"epoch": 0.39,
"grad_norm": 0.14989694952964783,
"learning_rate": 6.946408965970536e-06,
"loss": 0.5075,
"step": 1818
},
{
"epoch": 0.39,
"grad_norm": 0.1673702746629715,
"learning_rate": 6.943194609844288e-06,
"loss": 0.5485,
"step": 1819
},
{
"epoch": 0.39,
"grad_norm": 0.1309339702129364,
"learning_rate": 6.939979307469892e-06,
"loss": 0.5218,
"step": 1820
},
{
"epoch": 0.39,
"grad_norm": 0.1157936230301857,
"learning_rate": 6.93676306041305e-06,
"loss": 0.502,
"step": 1821
},
{
"epoch": 0.39,
"grad_norm": 0.1451912224292755,
"learning_rate": 6.933545870239933e-06,
"loss": 0.5339,
"step": 1822
},
{
"epoch": 0.39,
"grad_norm": 0.18552608788013458,
"learning_rate": 6.930327738517168e-06,
"loss": 0.4766,
"step": 1823
},
{
"epoch": 0.39,
"grad_norm": 0.1459437906742096,
"learning_rate": 6.927108666811837e-06,
"loss": 0.5381,
"step": 1824
},
{
"epoch": 0.39,
"grad_norm": 0.14324288070201874,
"learning_rate": 6.923888656691487e-06,
"loss": 0.4846,
"step": 1825
},
{
"epoch": 0.39,
"grad_norm": 0.14252141118049622,
"learning_rate": 6.920667709724113e-06,
"loss": 0.4756,
"step": 1826
},
{
"epoch": 0.39,
"grad_norm": 0.1347956657409668,
"learning_rate": 6.917445827478175e-06,
"loss": 0.5006,
"step": 1827
},
{
"epoch": 0.39,
"grad_norm": 0.17314203083515167,
"learning_rate": 6.914223011522581e-06,
"loss": 0.5711,
"step": 1828
},
{
"epoch": 0.39,
"grad_norm": 0.13734053075313568,
"learning_rate": 6.9109992634267e-06,
"loss": 0.4959,
"step": 1829
},
{
"epoch": 0.39,
"grad_norm": 0.15517868101596832,
"learning_rate": 6.90777458476035e-06,
"loss": 0.5151,
"step": 1830
},
{
"epoch": 0.39,
"grad_norm": 0.17450636625289917,
"learning_rate": 6.9045489770938045e-06,
"loss": 0.4883,
"step": 1831
},
{
"epoch": 0.39,
"grad_norm": 0.202430859208107,
"learning_rate": 6.901322441997791e-06,
"loss": 0.4894,
"step": 1832
},
{
"epoch": 0.39,
"grad_norm": 0.27107375860214233,
"learning_rate": 6.898094981043482e-06,
"loss": 0.5584,
"step": 1833
},
{
"epoch": 0.4,
"grad_norm": 0.15221843123435974,
"learning_rate": 6.894866595802509e-06,
"loss": 0.5003,
"step": 1834
},
{
"epoch": 0.4,
"grad_norm": 0.17178794741630554,
"learning_rate": 6.89163728784695e-06,
"loss": 0.548,
"step": 1835
},
{
"epoch": 0.4,
"grad_norm": 0.16640210151672363,
"learning_rate": 6.888407058749331e-06,
"loss": 0.5008,
"step": 1836
},
{
"epoch": 0.4,
"grad_norm": 0.19455331563949585,
"learning_rate": 6.885175910082631e-06,
"loss": 0.5069,
"step": 1837
},
{
"epoch": 0.4,
"grad_norm": 0.1528869867324829,
"learning_rate": 6.881943843420268e-06,
"loss": 0.5051,
"step": 1838
},
{
"epoch": 0.4,
"grad_norm": 0.16115941107273102,
"learning_rate": 6.878710860336118e-06,
"loss": 0.4924,
"step": 1839
},
{
"epoch": 0.4,
"grad_norm": 0.12841373682022095,
"learning_rate": 6.875476962404495e-06,
"loss": 0.4966,
"step": 1840
},
{
"epoch": 0.4,
"grad_norm": 0.1625949740409851,
"learning_rate": 6.8722421512001625e-06,
"loss": 0.5575,
"step": 1841
},
{
"epoch": 0.4,
"grad_norm": 0.18129919469356537,
"learning_rate": 6.869006428298328e-06,
"loss": 0.5509,
"step": 1842
},
{
"epoch": 0.4,
"grad_norm": 0.14833548665046692,
"learning_rate": 6.865769795274641e-06,
"loss": 0.5444,
"step": 1843
},
{
"epoch": 0.4,
"grad_norm": 0.14769743382930756,
"learning_rate": 6.862532253705199e-06,
"loss": 0.4723,
"step": 1844
},
{
"epoch": 0.4,
"grad_norm": 0.13029511272907257,
"learning_rate": 6.859293805166536e-06,
"loss": 0.4908,
"step": 1845
},
{
"epoch": 0.4,
"grad_norm": 0.19006066024303436,
"learning_rate": 6.85605445123563e-06,
"loss": 0.4983,
"step": 1846
},
{
"epoch": 0.4,
"grad_norm": 0.13327574729919434,
"learning_rate": 6.852814193489903e-06,
"loss": 0.5046,
"step": 1847
},
{
"epoch": 0.4,
"grad_norm": 0.16421039402484894,
"learning_rate": 6.849573033507213e-06,
"loss": 0.4845,
"step": 1848
},
{
"epoch": 0.4,
"grad_norm": 0.14652986824512482,
"learning_rate": 6.846330972865857e-06,
"loss": 0.5351,
"step": 1849
},
{
"epoch": 0.4,
"grad_norm": 0.1581708788871765,
"learning_rate": 6.843088013144575e-06,
"loss": 0.5125,
"step": 1850
},
{
"epoch": 0.4,
"grad_norm": 0.13055890798568726,
"learning_rate": 6.839844155922543e-06,
"loss": 0.4872,
"step": 1851
},
{
"epoch": 0.4,
"grad_norm": 0.17920167744159698,
"learning_rate": 6.8365994027793695e-06,
"loss": 0.5181,
"step": 1852
},
{
"epoch": 0.4,
"grad_norm": 0.16211476922035217,
"learning_rate": 6.833353755295104e-06,
"loss": 0.4617,
"step": 1853
},
{
"epoch": 0.4,
"grad_norm": 0.15161064267158508,
"learning_rate": 6.830107215050232e-06,
"loss": 0.4736,
"step": 1854
},
{
"epoch": 0.4,
"grad_norm": 0.15771758556365967,
"learning_rate": 6.826859783625674e-06,
"loss": 0.5481,
"step": 1855
},
{
"epoch": 0.4,
"grad_norm": 0.17663753032684326,
"learning_rate": 6.823611462602777e-06,
"loss": 0.562,
"step": 1856
},
{
"epoch": 0.4,
"grad_norm": 0.16153866052627563,
"learning_rate": 6.82036225356333e-06,
"loss": 0.4947,
"step": 1857
},
{
"epoch": 0.4,
"grad_norm": 0.20720727741718292,
"learning_rate": 6.817112158089554e-06,
"loss": 0.5606,
"step": 1858
},
{
"epoch": 0.4,
"grad_norm": 0.21727946400642395,
"learning_rate": 6.813861177764094e-06,
"loss": 0.5017,
"step": 1859
},
{
"epoch": 0.4,
"grad_norm": 0.2113008350133896,
"learning_rate": 6.8106093141700336e-06,
"loss": 0.5526,
"step": 1860
},
{
"epoch": 0.4,
"grad_norm": 0.16218236088752747,
"learning_rate": 6.807356568890884e-06,
"loss": 0.4807,
"step": 1861
},
{
"epoch": 0.4,
"grad_norm": 0.18519651889801025,
"learning_rate": 6.804102943510583e-06,
"loss": 0.5168,
"step": 1862
},
{
"epoch": 0.4,
"grad_norm": 0.18724150955677032,
"learning_rate": 6.800848439613504e-06,
"loss": 0.4815,
"step": 1863
},
{
"epoch": 0.4,
"grad_norm": 0.14294007420539856,
"learning_rate": 6.797593058784437e-06,
"loss": 0.5586,
"step": 1864
},
{
"epoch": 0.4,
"grad_norm": 0.159059077501297,
"learning_rate": 6.7943368026086124e-06,
"loss": 0.5098,
"step": 1865
},
{
"epoch": 0.4,
"grad_norm": 0.16052033007144928,
"learning_rate": 6.791079672671677e-06,
"loss": 0.5117,
"step": 1866
},
{
"epoch": 0.4,
"grad_norm": 0.1647024303674698,
"learning_rate": 6.787821670559705e-06,
"loss": 0.5381,
"step": 1867
},
{
"epoch": 0.4,
"grad_norm": 0.18817616999149323,
"learning_rate": 6.784562797859198e-06,
"loss": 0.4719,
"step": 1868
},
{
"epoch": 0.4,
"grad_norm": 0.18448995053768158,
"learning_rate": 6.78130305615708e-06,
"loss": 0.5259,
"step": 1869
},
{
"epoch": 0.4,
"grad_norm": 0.1643984615802765,
"learning_rate": 6.7780424470407004e-06,
"loss": 0.5437,
"step": 1870
},
{
"epoch": 0.4,
"grad_norm": 0.14963030815124512,
"learning_rate": 6.774780972097823e-06,
"loss": 0.4785,
"step": 1871
},
{
"epoch": 0.4,
"grad_norm": 0.18385331332683563,
"learning_rate": 6.771518632916645e-06,
"loss": 0.4909,
"step": 1872
},
{
"epoch": 0.4,
"grad_norm": 0.1393522322177887,
"learning_rate": 6.7682554310857755e-06,
"loss": 0.4809,
"step": 1873
},
{
"epoch": 0.4,
"grad_norm": 0.16635462641716003,
"learning_rate": 6.7649913681942455e-06,
"loss": 0.5425,
"step": 1874
},
{
"epoch": 0.4,
"grad_norm": 0.15144184231758118,
"learning_rate": 6.761726445831511e-06,
"loss": 0.5033,
"step": 1875
},
{
"epoch": 0.4,
"grad_norm": 0.17777347564697266,
"learning_rate": 6.758460665587437e-06,
"loss": 0.5561,
"step": 1876
},
{
"epoch": 0.4,
"grad_norm": 0.2699100375175476,
"learning_rate": 6.755194029052313e-06,
"loss": 0.5314,
"step": 1877
},
{
"epoch": 0.4,
"grad_norm": 0.17995339632034302,
"learning_rate": 6.751926537816846e-06,
"loss": 0.5097,
"step": 1878
},
{
"epoch": 0.4,
"grad_norm": 0.14517782628536224,
"learning_rate": 6.748658193472155e-06,
"loss": 0.524,
"step": 1879
},
{
"epoch": 0.4,
"grad_norm": 0.14715701341629028,
"learning_rate": 6.745388997609774e-06,
"loss": 0.5633,
"step": 1880
},
{
"epoch": 0.41,
"grad_norm": 0.16807079315185547,
"learning_rate": 6.7421189518216576e-06,
"loss": 0.5106,
"step": 1881
},
{
"epoch": 0.41,
"grad_norm": 0.1584351658821106,
"learning_rate": 6.738848057700169e-06,
"loss": 0.5602,
"step": 1882
},
{
"epoch": 0.41,
"grad_norm": 0.16300451755523682,
"learning_rate": 6.735576316838087e-06,
"loss": 0.5455,
"step": 1883
},
{
"epoch": 0.41,
"grad_norm": 0.16324667632579803,
"learning_rate": 6.732303730828601e-06,
"loss": 0.5247,
"step": 1884
},
{
"epoch": 0.41,
"grad_norm": 0.16887761652469635,
"learning_rate": 6.7290303012653136e-06,
"loss": 0.4953,
"step": 1885
},
{
"epoch": 0.41,
"grad_norm": 0.1934385746717453,
"learning_rate": 6.725756029742234e-06,
"loss": 0.4727,
"step": 1886
},
{
"epoch": 0.41,
"grad_norm": 0.17485982179641724,
"learning_rate": 6.7224809178537894e-06,
"loss": 0.5003,
"step": 1887
},
{
"epoch": 0.41,
"grad_norm": 0.14065895974636078,
"learning_rate": 6.7192049671948115e-06,
"loss": 0.4841,
"step": 1888
},
{
"epoch": 0.41,
"grad_norm": 0.12996014952659607,
"learning_rate": 6.715928179360538e-06,
"loss": 0.4906,
"step": 1889
},
{
"epoch": 0.41,
"grad_norm": 0.14599494636058807,
"learning_rate": 6.712650555946616e-06,
"loss": 0.5114,
"step": 1890
},
{
"epoch": 0.41,
"grad_norm": 0.1689714789390564,
"learning_rate": 6.709372098549104e-06,
"loss": 0.5318,
"step": 1891
},
{
"epoch": 0.41,
"grad_norm": 0.14123961329460144,
"learning_rate": 6.706092808764459e-06,
"loss": 0.5013,
"step": 1892
},
{
"epoch": 0.41,
"grad_norm": 0.14629031717777252,
"learning_rate": 6.702812688189551e-06,
"loss": 0.5524,
"step": 1893
},
{
"epoch": 0.41,
"grad_norm": 0.1583494246006012,
"learning_rate": 6.699531738421648e-06,
"loss": 0.5285,
"step": 1894
},
{
"epoch": 0.41,
"grad_norm": 0.17046624422073364,
"learning_rate": 6.696249961058426e-06,
"loss": 0.5125,
"step": 1895
},
{
"epoch": 0.41,
"grad_norm": 0.1436389535665512,
"learning_rate": 6.692967357697961e-06,
"loss": 0.5045,
"step": 1896
},
{
"epoch": 0.41,
"grad_norm": 0.18508578836917877,
"learning_rate": 6.689683929938736e-06,
"loss": 0.5401,
"step": 1897
},
{
"epoch": 0.41,
"grad_norm": 0.1609339416027069,
"learning_rate": 6.6863996793796286e-06,
"loss": 0.5026,
"step": 1898
},
{
"epoch": 0.41,
"grad_norm": 0.17639221251010895,
"learning_rate": 6.683114607619923e-06,
"loss": 0.5563,
"step": 1899
},
{
"epoch": 0.41,
"grad_norm": 0.15782758593559265,
"learning_rate": 6.6798287162593e-06,
"loss": 0.5344,
"step": 1900
},
{
"epoch": 0.41,
"grad_norm": 0.14880798757076263,
"learning_rate": 6.676542006897842e-06,
"loss": 0.4987,
"step": 1901
},
{
"epoch": 0.41,
"grad_norm": 0.18628853559494019,
"learning_rate": 6.6732544811360255e-06,
"loss": 0.4961,
"step": 1902
},
{
"epoch": 0.41,
"grad_norm": 0.18380938470363617,
"learning_rate": 6.669966140574729e-06,
"loss": 0.5529,
"step": 1903
},
{
"epoch": 0.41,
"grad_norm": 0.18866044282913208,
"learning_rate": 6.666676986815227e-06,
"loss": 0.5462,
"step": 1904
},
{
"epoch": 0.41,
"grad_norm": 0.16578936576843262,
"learning_rate": 6.663387021459187e-06,
"loss": 0.513,
"step": 1905
},
{
"epoch": 0.41,
"grad_norm": 0.18033047020435333,
"learning_rate": 6.660096246108677e-06,
"loss": 0.4892,
"step": 1906
},
{
"epoch": 0.41,
"grad_norm": 0.16443459689617157,
"learning_rate": 6.656804662366153e-06,
"loss": 0.5372,
"step": 1907
},
{
"epoch": 0.41,
"grad_norm": 0.14939545094966888,
"learning_rate": 6.653512271834468e-06,
"loss": 0.5273,
"step": 1908
},
{
"epoch": 0.41,
"grad_norm": 0.17759068310260773,
"learning_rate": 6.650219076116868e-06,
"loss": 0.4714,
"step": 1909
},
{
"epoch": 0.41,
"grad_norm": 0.1866803765296936,
"learning_rate": 6.646925076816994e-06,
"loss": 0.5261,
"step": 1910
},
{
"epoch": 0.41,
"grad_norm": 0.15621764957904816,
"learning_rate": 6.643630275538871e-06,
"loss": 0.521,
"step": 1911
},
{
"epoch": 0.41,
"grad_norm": 0.20561483502388,
"learning_rate": 6.640334673886921e-06,
"loss": 0.531,
"step": 1912
},
{
"epoch": 0.41,
"grad_norm": 0.1349986344575882,
"learning_rate": 6.637038273465952e-06,
"loss": 0.5328,
"step": 1913
},
{
"epoch": 0.41,
"grad_norm": 0.1595732718706131,
"learning_rate": 6.633741075881163e-06,
"loss": 0.5151,
"step": 1914
},
{
"epoch": 0.41,
"grad_norm": 0.15593409538269043,
"learning_rate": 6.63044308273814e-06,
"loss": 0.5507,
"step": 1915
},
{
"epoch": 0.41,
"grad_norm": 0.1654960662126541,
"learning_rate": 6.627144295642859e-06,
"loss": 0.5172,
"step": 1916
},
{
"epoch": 0.41,
"grad_norm": 0.13034138083457947,
"learning_rate": 6.6238447162016786e-06,
"loss": 0.561,
"step": 1917
},
{
"epoch": 0.41,
"grad_norm": 0.14604593813419342,
"learning_rate": 6.6205443460213445e-06,
"loss": 0.5173,
"step": 1918
},
{
"epoch": 0.41,
"grad_norm": 0.18159790337085724,
"learning_rate": 6.617243186708989e-06,
"loss": 0.5295,
"step": 1919
},
{
"epoch": 0.41,
"grad_norm": 0.1321515291929245,
"learning_rate": 6.613941239872129e-06,
"loss": 0.4762,
"step": 1920
},
{
"epoch": 0.41,
"grad_norm": 0.13790853321552277,
"learning_rate": 6.610638507118663e-06,
"loss": 0.5172,
"step": 1921
},
{
"epoch": 0.41,
"grad_norm": 0.15198110044002533,
"learning_rate": 6.607334990056873e-06,
"loss": 0.5019,
"step": 1922
},
{
"epoch": 0.41,
"grad_norm": 0.1440410614013672,
"learning_rate": 6.604030690295422e-06,
"loss": 0.481,
"step": 1923
},
{
"epoch": 0.41,
"grad_norm": 0.23520071804523468,
"learning_rate": 6.600725609443356e-06,
"loss": 0.4935,
"step": 1924
},
{
"epoch": 0.41,
"grad_norm": 0.12442398816347122,
"learning_rate": 6.597419749110099e-06,
"loss": 0.5067,
"step": 1925
},
{
"epoch": 0.41,
"grad_norm": 0.19941824674606323,
"learning_rate": 6.594113110905458e-06,
"loss": 0.5489,
"step": 1926
},
{
"epoch": 0.42,
"grad_norm": 0.16936185956001282,
"learning_rate": 6.5908056964396135e-06,
"loss": 0.5173,
"step": 1927
},
{
"epoch": 0.42,
"grad_norm": 0.1414109170436859,
"learning_rate": 6.587497507323132e-06,
"loss": 0.4946,
"step": 1928
},
{
"epoch": 0.42,
"grad_norm": 0.1461210548877716,
"learning_rate": 6.584188545166948e-06,
"loss": 0.5585,
"step": 1929
},
{
"epoch": 0.42,
"grad_norm": 0.14086653292179108,
"learning_rate": 6.580878811582379e-06,
"loss": 0.5138,
"step": 1930
},
{
"epoch": 0.42,
"grad_norm": 0.13712497055530548,
"learning_rate": 6.5775683081811144e-06,
"loss": 0.5223,
"step": 1931
},
{
"epoch": 0.42,
"grad_norm": 0.18051303923130035,
"learning_rate": 6.574257036575224e-06,
"loss": 0.5229,
"step": 1932
},
{
"epoch": 0.42,
"grad_norm": 0.18365350365638733,
"learning_rate": 6.5709449983771414e-06,
"loss": 0.5357,
"step": 1933
},
{
"epoch": 0.42,
"grad_norm": 0.1633131057024002,
"learning_rate": 6.567632195199686e-06,
"loss": 0.5919,
"step": 1934
},
{
"epoch": 0.42,
"grad_norm": 0.18704870343208313,
"learning_rate": 6.564318628656039e-06,
"loss": 0.5212,
"step": 1935
},
{
"epoch": 0.42,
"grad_norm": 0.15724125504493713,
"learning_rate": 6.5610043003597615e-06,
"loss": 0.5219,
"step": 1936
},
{
"epoch": 0.42,
"grad_norm": 0.14116469025611877,
"learning_rate": 6.557689211924779e-06,
"loss": 0.5133,
"step": 1937
},
{
"epoch": 0.42,
"grad_norm": 0.20150695741176605,
"learning_rate": 6.554373364965392e-06,
"loss": 0.5256,
"step": 1938
},
{
"epoch": 0.42,
"grad_norm": 0.18280090391635895,
"learning_rate": 6.551056761096269e-06,
"loss": 0.5481,
"step": 1939
},
{
"epoch": 0.42,
"grad_norm": 0.18789951503276825,
"learning_rate": 6.547739401932443e-06,
"loss": 0.4974,
"step": 1940
},
{
"epoch": 0.42,
"grad_norm": 0.15406067669391632,
"learning_rate": 6.544421289089321e-06,
"loss": 0.543,
"step": 1941
},
{
"epoch": 0.42,
"grad_norm": 0.16543880105018616,
"learning_rate": 6.541102424182676e-06,
"loss": 0.5503,
"step": 1942
},
{
"epoch": 0.42,
"grad_norm": 0.17979435622692108,
"learning_rate": 6.537782808828641e-06,
"loss": 0.5514,
"step": 1943
},
{
"epoch": 0.42,
"grad_norm": 0.19799616932868958,
"learning_rate": 6.5344624446437234e-06,
"loss": 0.499,
"step": 1944
},
{
"epoch": 0.42,
"grad_norm": 0.16152727603912354,
"learning_rate": 6.531141333244789e-06,
"loss": 0.5483,
"step": 1945
},
{
"epoch": 0.42,
"grad_norm": 0.16674454510211945,
"learning_rate": 6.527819476249066e-06,
"loss": 0.5127,
"step": 1946
},
{
"epoch": 0.42,
"grad_norm": 0.16409684717655182,
"learning_rate": 6.5244968752741555e-06,
"loss": 0.5407,
"step": 1947
},
{
"epoch": 0.42,
"grad_norm": 0.1826597899198532,
"learning_rate": 6.521173531938011e-06,
"loss": 0.446,
"step": 1948
},
{
"epoch": 0.42,
"grad_norm": 0.17517463862895966,
"learning_rate": 6.517849447858951e-06,
"loss": 0.5539,
"step": 1949
},
{
"epoch": 0.42,
"grad_norm": 0.14857599139213562,
"learning_rate": 6.514524624655654e-06,
"loss": 0.5278,
"step": 1950
},
{
"epoch": 0.42,
"grad_norm": 0.13251933455467224,
"learning_rate": 6.511199063947159e-06,
"loss": 0.4874,
"step": 1951
},
{
"epoch": 0.42,
"grad_norm": 0.138553187251091,
"learning_rate": 6.507872767352863e-06,
"loss": 0.5654,
"step": 1952
},
{
"epoch": 0.42,
"grad_norm": 0.13305741548538208,
"learning_rate": 6.504545736492526e-06,
"loss": 0.5318,
"step": 1953
},
{
"epoch": 0.42,
"grad_norm": 0.14779391884803772,
"learning_rate": 6.50121797298626e-06,
"loss": 0.5017,
"step": 1954
},
{
"epoch": 0.42,
"grad_norm": 0.1407061219215393,
"learning_rate": 6.497889478454534e-06,
"loss": 0.4967,
"step": 1955
},
{
"epoch": 0.42,
"grad_norm": 0.14632262289524078,
"learning_rate": 6.494560254518179e-06,
"loss": 0.4989,
"step": 1956
},
{
"epoch": 0.42,
"grad_norm": 0.2105487734079361,
"learning_rate": 6.491230302798372e-06,
"loss": 0.5095,
"step": 1957
},
{
"epoch": 0.42,
"grad_norm": 0.15186044573783875,
"learning_rate": 6.487899624916654e-06,
"loss": 0.5069,
"step": 1958
},
{
"epoch": 0.42,
"grad_norm": 0.15018121898174286,
"learning_rate": 6.484568222494911e-06,
"loss": 0.5031,
"step": 1959
},
{
"epoch": 0.42,
"grad_norm": 0.15453185141086578,
"learning_rate": 6.481236097155389e-06,
"loss": 0.513,
"step": 1960
},
{
"epoch": 0.42,
"grad_norm": 0.14290063083171844,
"learning_rate": 6.47790325052068e-06,
"loss": 0.4524,
"step": 1961
},
{
"epoch": 0.42,
"grad_norm": 0.17694444954395294,
"learning_rate": 6.4745696842137305e-06,
"loss": 0.5628,
"step": 1962
},
{
"epoch": 0.42,
"grad_norm": 0.15745702385902405,
"learning_rate": 6.4712353998578396e-06,
"loss": 0.5302,
"step": 1963
},
{
"epoch": 0.42,
"grad_norm": 0.1511646956205368,
"learning_rate": 6.467900399076651e-06,
"loss": 0.5041,
"step": 1964
},
{
"epoch": 0.42,
"grad_norm": 0.15910549461841583,
"learning_rate": 6.46456468349416e-06,
"loss": 0.5193,
"step": 1965
},
{
"epoch": 0.42,
"grad_norm": 0.15061886608600616,
"learning_rate": 6.461228254734711e-06,
"loss": 0.48,
"step": 1966
},
{
"epoch": 0.42,
"grad_norm": 0.1490405946969986,
"learning_rate": 6.4578911144229915e-06,
"loss": 0.4894,
"step": 1967
},
{
"epoch": 0.42,
"grad_norm": 0.13372862339019775,
"learning_rate": 6.454553264184041e-06,
"loss": 0.5259,
"step": 1968
},
{
"epoch": 0.42,
"grad_norm": 0.15754102170467377,
"learning_rate": 6.451214705643241e-06,
"loss": 0.5001,
"step": 1969
},
{
"epoch": 0.42,
"grad_norm": 0.17153845727443695,
"learning_rate": 6.447875440426319e-06,
"loss": 0.5492,
"step": 1970
},
{
"epoch": 0.42,
"grad_norm": 0.1674170047044754,
"learning_rate": 6.444535470159346e-06,
"loss": 0.5032,
"step": 1971
},
{
"epoch": 0.42,
"grad_norm": 0.12836651504039764,
"learning_rate": 6.441194796468739e-06,
"loss": 0.4732,
"step": 1972
},
{
"epoch": 0.43,
"grad_norm": 0.1573239266872406,
"learning_rate": 6.437853420981254e-06,
"loss": 0.4972,
"step": 1973
},
{
"epoch": 0.43,
"grad_norm": 0.18357399106025696,
"learning_rate": 6.434511345323988e-06,
"loss": 0.5351,
"step": 1974
},
{
"epoch": 0.43,
"grad_norm": 0.14846058189868927,
"learning_rate": 6.431168571124387e-06,
"loss": 0.4689,
"step": 1975
},
{
"epoch": 0.43,
"grad_norm": 0.14961528778076172,
"learning_rate": 6.427825100010225e-06,
"loss": 0.5394,
"step": 1976
},
{
"epoch": 0.43,
"grad_norm": 0.2099412977695465,
"learning_rate": 6.424480933609626e-06,
"loss": 0.5802,
"step": 1977
},
{
"epoch": 0.43,
"grad_norm": 0.1339603066444397,
"learning_rate": 6.421136073551047e-06,
"loss": 0.499,
"step": 1978
},
{
"epoch": 0.43,
"grad_norm": 0.1474086493253708,
"learning_rate": 6.417790521463282e-06,
"loss": 0.511,
"step": 1979
},
{
"epoch": 0.43,
"grad_norm": 0.13013140857219696,
"learning_rate": 6.414444278975465e-06,
"loss": 0.5215,
"step": 1980
},
{
"epoch": 0.43,
"grad_norm": 0.14738723635673523,
"learning_rate": 6.411097347717068e-06,
"loss": 0.5079,
"step": 1981
},
{
"epoch": 0.43,
"grad_norm": 0.18411760032176971,
"learning_rate": 6.407749729317892e-06,
"loss": 0.51,
"step": 1982
},
{
"epoch": 0.43,
"grad_norm": 0.15733294188976288,
"learning_rate": 6.404401425408079e-06,
"loss": 0.5247,
"step": 1983
},
{
"epoch": 0.43,
"grad_norm": 0.1328936368227005,
"learning_rate": 6.401052437618098e-06,
"loss": 0.5223,
"step": 1984
},
{
"epoch": 0.43,
"grad_norm": 0.15146395564079285,
"learning_rate": 6.397702767578761e-06,
"loss": 0.5676,
"step": 1985
},
{
"epoch": 0.43,
"grad_norm": 0.1269007921218872,
"learning_rate": 6.394352416921201e-06,
"loss": 0.47,
"step": 1986
},
{
"epoch": 0.43,
"grad_norm": 0.15181781351566315,
"learning_rate": 6.39100138727689e-06,
"loss": 0.56,
"step": 1987
},
{
"epoch": 0.43,
"grad_norm": 0.1406852751970291,
"learning_rate": 6.387649680277629e-06,
"loss": 0.5753,
"step": 1988
},
{
"epoch": 0.43,
"grad_norm": 0.2074470818042755,
"learning_rate": 6.384297297555546e-06,
"loss": 0.528,
"step": 1989
},
{
"epoch": 0.43,
"grad_norm": 0.15589666366577148,
"learning_rate": 6.380944240743101e-06,
"loss": 0.5103,
"step": 1990
},
{
"epoch": 0.43,
"grad_norm": 0.156142920255661,
"learning_rate": 6.377590511473083e-06,
"loss": 0.5082,
"step": 1991
},
{
"epoch": 0.43,
"grad_norm": 0.18364138901233673,
"learning_rate": 6.374236111378605e-06,
"loss": 0.5319,
"step": 1992
},
{
"epoch": 0.43,
"grad_norm": 0.13717058300971985,
"learning_rate": 6.37088104209311e-06,
"loss": 0.5207,
"step": 1993
},
{
"epoch": 0.43,
"grad_norm": 0.1605088859796524,
"learning_rate": 6.3675253052503645e-06,
"loss": 0.4823,
"step": 1994
},
{
"epoch": 0.43,
"grad_norm": 0.13547933101654053,
"learning_rate": 6.364168902484461e-06,
"loss": 0.5081,
"step": 1995
},
{
"epoch": 0.43,
"grad_norm": 0.1631360799074173,
"learning_rate": 6.360811835429817e-06,
"loss": 0.5494,
"step": 1996
},
{
"epoch": 0.43,
"grad_norm": 0.15566737949848175,
"learning_rate": 6.357454105721171e-06,
"loss": 0.5708,
"step": 1997
},
{
"epoch": 0.43,
"grad_norm": 0.18726012110710144,
"learning_rate": 6.35409571499359e-06,
"loss": 0.524,
"step": 1998
},
{
"epoch": 0.43,
"grad_norm": 0.18683874607086182,
"learning_rate": 6.350736664882454e-06,
"loss": 0.477,
"step": 1999
},
{
"epoch": 0.43,
"grad_norm": 0.15933635830879211,
"learning_rate": 6.347376957023471e-06,
"loss": 0.5524,
"step": 2000
},
{
"epoch": 0.43,
"grad_norm": 0.16675737500190735,
"learning_rate": 6.344016593052669e-06,
"loss": 0.5126,
"step": 2001
},
{
"epoch": 0.43,
"grad_norm": 0.22275328636169434,
"learning_rate": 6.340655574606391e-06,
"loss": 0.5203,
"step": 2002
},
{
"epoch": 0.43,
"grad_norm": 0.1311800628900528,
"learning_rate": 6.337293903321303e-06,
"loss": 0.5132,
"step": 2003
},
{
"epoch": 0.43,
"grad_norm": 0.12225490063428879,
"learning_rate": 6.333931580834387e-06,
"loss": 0.5529,
"step": 2004
},
{
"epoch": 0.43,
"grad_norm": 0.14834477007389069,
"learning_rate": 6.330568608782941e-06,
"loss": 0.5045,
"step": 2005
},
{
"epoch": 0.43,
"grad_norm": 0.13984233140945435,
"learning_rate": 6.327204988804583e-06,
"loss": 0.5398,
"step": 2006
},
{
"epoch": 0.43,
"grad_norm": 0.13225583732128143,
"learning_rate": 6.323840722537243e-06,
"loss": 0.5065,
"step": 2007
},
{
"epoch": 0.43,
"grad_norm": 0.16569088399410248,
"learning_rate": 6.320475811619167e-06,
"loss": 0.529,
"step": 2008
},
{
"epoch": 0.43,
"grad_norm": 0.20376458764076233,
"learning_rate": 6.317110257688917e-06,
"loss": 0.47,
"step": 2009
},
{
"epoch": 0.43,
"grad_norm": 0.20211917161941528,
"learning_rate": 6.313744062385363e-06,
"loss": 0.5044,
"step": 2010
},
{
"epoch": 0.43,
"grad_norm": 0.1894192099571228,
"learning_rate": 6.31037722734769e-06,
"loss": 0.574,
"step": 2011
},
{
"epoch": 0.43,
"grad_norm": 0.14667464792728424,
"learning_rate": 6.307009754215397e-06,
"loss": 0.5502,
"step": 2012
},
{
"epoch": 0.43,
"grad_norm": 0.17428962886333466,
"learning_rate": 6.303641644628291e-06,
"loss": 0.5423,
"step": 2013
},
{
"epoch": 0.43,
"grad_norm": 0.1584947109222412,
"learning_rate": 6.300272900226491e-06,
"loss": 0.4784,
"step": 2014
},
{
"epoch": 0.43,
"grad_norm": 0.14651672542095184,
"learning_rate": 6.296903522650419e-06,
"loss": 0.4896,
"step": 2015
},
{
"epoch": 0.43,
"grad_norm": 0.13722088932991028,
"learning_rate": 6.2935335135408135e-06,
"loss": 0.4324,
"step": 2016
},
{
"epoch": 0.43,
"grad_norm": 0.16364432871341705,
"learning_rate": 6.290162874538718e-06,
"loss": 0.5051,
"step": 2017
},
{
"epoch": 0.43,
"grad_norm": 0.17197778820991516,
"learning_rate": 6.286791607285478e-06,
"loss": 0.4707,
"step": 2018
},
{
"epoch": 0.43,
"grad_norm": 0.19821661710739136,
"learning_rate": 6.283419713422754e-06,
"loss": 0.5365,
"step": 2019
},
{
"epoch": 0.44,
"grad_norm": 0.18750454485416412,
"learning_rate": 6.2800471945925e-06,
"loss": 0.5813,
"step": 2020
},
{
"epoch": 0.44,
"grad_norm": 0.15011686086654663,
"learning_rate": 6.276674052436984e-06,
"loss": 0.4686,
"step": 2021
},
{
"epoch": 0.44,
"grad_norm": 0.14810575544834137,
"learning_rate": 6.2733002885987734e-06,
"loss": 0.5666,
"step": 2022
},
{
"epoch": 0.44,
"grad_norm": 0.15707622468471527,
"learning_rate": 6.26992590472074e-06,
"loss": 0.4939,
"step": 2023
},
{
"epoch": 0.44,
"grad_norm": 0.16260173916816711,
"learning_rate": 6.2665509024460554e-06,
"loss": 0.5063,
"step": 2024
},
{
"epoch": 0.44,
"grad_norm": 0.14994855225086212,
"learning_rate": 6.263175283418196e-06,
"loss": 0.4813,
"step": 2025
},
{
"epoch": 0.44,
"grad_norm": 0.16885532438755035,
"learning_rate": 6.259799049280932e-06,
"loss": 0.5239,
"step": 2026
},
{
"epoch": 0.44,
"grad_norm": 0.1944415271282196,
"learning_rate": 6.256422201678341e-06,
"loss": 0.4999,
"step": 2027
},
{
"epoch": 0.44,
"grad_norm": 0.16358405351638794,
"learning_rate": 6.253044742254791e-06,
"loss": 0.532,
"step": 2028
},
{
"epoch": 0.44,
"grad_norm": 0.184137225151062,
"learning_rate": 6.249666672654958e-06,
"loss": 0.4797,
"step": 2029
},
{
"epoch": 0.44,
"grad_norm": 0.18166375160217285,
"learning_rate": 6.246287994523805e-06,
"loss": 0.5129,
"step": 2030
},
{
"epoch": 0.44,
"grad_norm": 0.13478122651576996,
"learning_rate": 6.242908709506599e-06,
"loss": 0.4996,
"step": 2031
},
{
"epoch": 0.44,
"grad_norm": 0.1508375108242035,
"learning_rate": 6.239528819248898e-06,
"loss": 0.4822,
"step": 2032
},
{
"epoch": 0.44,
"grad_norm": 0.14239796996116638,
"learning_rate": 6.236148325396555e-06,
"loss": 0.5381,
"step": 2033
},
{
"epoch": 0.44,
"grad_norm": 0.13590578734874725,
"learning_rate": 6.232767229595719e-06,
"loss": 0.5076,
"step": 2034
},
{
"epoch": 0.44,
"grad_norm": 0.1495681256055832,
"learning_rate": 6.229385533492833e-06,
"loss": 0.5012,
"step": 2035
},
{
"epoch": 0.44,
"grad_norm": 0.11667856574058533,
"learning_rate": 6.226003238734628e-06,
"loss": 0.5408,
"step": 2036
},
{
"epoch": 0.44,
"grad_norm": 0.12598071992397308,
"learning_rate": 6.222620346968131e-06,
"loss": 0.4822,
"step": 2037
},
{
"epoch": 0.44,
"grad_norm": 0.18622703850269318,
"learning_rate": 6.219236859840656e-06,
"loss": 0.5583,
"step": 2038
},
{
"epoch": 0.44,
"grad_norm": 0.15623895823955536,
"learning_rate": 6.21585277899981e-06,
"loss": 0.5272,
"step": 2039
},
{
"epoch": 0.44,
"grad_norm": 0.15245303511619568,
"learning_rate": 6.2124681060934866e-06,
"loss": 0.5504,
"step": 2040
},
{
"epoch": 0.44,
"grad_norm": 0.2059142142534256,
"learning_rate": 6.2090828427698706e-06,
"loss": 0.5196,
"step": 2041
},
{
"epoch": 0.44,
"grad_norm": 0.14754840731620789,
"learning_rate": 6.205696990677431e-06,
"loss": 0.5198,
"step": 2042
},
{
"epoch": 0.44,
"grad_norm": 0.14195892214775085,
"learning_rate": 6.202310551464924e-06,
"loss": 0.523,
"step": 2043
},
{
"epoch": 0.44,
"grad_norm": 0.17063148319721222,
"learning_rate": 6.1989235267813964e-06,
"loss": 0.5115,
"step": 2044
},
{
"epoch": 0.44,
"grad_norm": 0.1315128356218338,
"learning_rate": 6.1955359182761745e-06,
"loss": 0.5535,
"step": 2045
},
{
"epoch": 0.44,
"grad_norm": 0.26819273829460144,
"learning_rate": 6.192147727598869e-06,
"loss": 0.4942,
"step": 2046
},
{
"epoch": 0.44,
"grad_norm": 0.15203434228897095,
"learning_rate": 6.188758956399379e-06,
"loss": 0.5349,
"step": 2047
},
{
"epoch": 0.44,
"grad_norm": 0.17396771907806396,
"learning_rate": 6.185369606327882e-06,
"loss": 0.5134,
"step": 2048
},
{
"epoch": 0.44,
"grad_norm": 0.14054559171199799,
"learning_rate": 6.1819796790348376e-06,
"loss": 0.5346,
"step": 2049
},
{
"epoch": 0.44,
"grad_norm": 0.13480958342552185,
"learning_rate": 6.178589176170991e-06,
"loss": 0.4995,
"step": 2050
},
{
"epoch": 0.44,
"grad_norm": 0.15606021881103516,
"learning_rate": 6.175198099387361e-06,
"loss": 0.5519,
"step": 2051
},
{
"epoch": 0.44,
"grad_norm": 0.14711807668209076,
"learning_rate": 6.171806450335248e-06,
"loss": 0.5303,
"step": 2052
},
{
"epoch": 0.44,
"grad_norm": 0.18359160423278809,
"learning_rate": 6.1684142306662366e-06,
"loss": 0.5784,
"step": 2053
},
{
"epoch": 0.44,
"grad_norm": 0.15108604729175568,
"learning_rate": 6.16502144203218e-06,
"loss": 0.5499,
"step": 2054
},
{
"epoch": 0.44,
"grad_norm": 0.12765131890773773,
"learning_rate": 6.161628086085218e-06,
"loss": 0.5531,
"step": 2055
},
{
"epoch": 0.44,
"grad_norm": 0.18855132162570953,
"learning_rate": 6.1582341644777575e-06,
"loss": 0.5236,
"step": 2056
},
{
"epoch": 0.44,
"grad_norm": 0.14612235128879547,
"learning_rate": 6.15483967886249e-06,
"loss": 0.5035,
"step": 2057
},
{
"epoch": 0.44,
"grad_norm": 0.1928872913122177,
"learning_rate": 6.151444630892372e-06,
"loss": 0.541,
"step": 2058
},
{
"epoch": 0.44,
"grad_norm": 0.16574794054031372,
"learning_rate": 6.1480490222206415e-06,
"loss": 0.5139,
"step": 2059
},
{
"epoch": 0.44,
"grad_norm": 0.17566706240177155,
"learning_rate": 6.144652854500806e-06,
"loss": 0.4495,
"step": 2060
},
{
"epoch": 0.44,
"grad_norm": 0.17141076922416687,
"learning_rate": 6.1412561293866455e-06,
"loss": 0.5434,
"step": 2061
},
{
"epoch": 0.44,
"grad_norm": 0.16970355808734894,
"learning_rate": 6.1378588485322145e-06,
"loss": 0.5635,
"step": 2062
},
{
"epoch": 0.44,
"grad_norm": 0.20742008090019226,
"learning_rate": 6.134461013591832e-06,
"loss": 0.5435,
"step": 2063
},
{
"epoch": 0.44,
"grad_norm": 0.1773451417684555,
"learning_rate": 6.131062626220094e-06,
"loss": 0.5539,
"step": 2064
},
{
"epoch": 0.44,
"grad_norm": 0.18251217901706696,
"learning_rate": 6.127663688071859e-06,
"loss": 0.5046,
"step": 2065
},
{
"epoch": 0.45,
"grad_norm": 0.19838100671768188,
"learning_rate": 6.124264200802259e-06,
"loss": 0.4714,
"step": 2066
},
{
"epoch": 0.45,
"grad_norm": 0.154763326048851,
"learning_rate": 6.120864166066689e-06,
"loss": 0.528,
"step": 2067
},
{
"epoch": 0.45,
"grad_norm": 0.19701255857944489,
"learning_rate": 6.117463585520813e-06,
"loss": 0.5295,
"step": 2068
},
{
"epoch": 0.45,
"grad_norm": 0.17150332033634186,
"learning_rate": 6.1140624608205626e-06,
"loss": 0.4896,
"step": 2069
},
{
"epoch": 0.45,
"grad_norm": 0.1474120020866394,
"learning_rate": 6.110660793622127e-06,
"loss": 0.5046,
"step": 2070
},
{
"epoch": 0.45,
"grad_norm": 0.18776945769786835,
"learning_rate": 6.10725858558197e-06,
"loss": 0.5263,
"step": 2071
},
{
"epoch": 0.45,
"grad_norm": 0.14684580266475677,
"learning_rate": 6.103855838356813e-06,
"loss": 0.5539,
"step": 2072
},
{
"epoch": 0.45,
"grad_norm": 0.12644240260124207,
"learning_rate": 6.100452553603638e-06,
"loss": 0.5047,
"step": 2073
},
{
"epoch": 0.45,
"grad_norm": 0.18356040120124817,
"learning_rate": 6.097048732979691e-06,
"loss": 0.5408,
"step": 2074
},
{
"epoch": 0.45,
"grad_norm": 0.13573047518730164,
"learning_rate": 6.093644378142481e-06,
"loss": 0.5369,
"step": 2075
},
{
"epoch": 0.45,
"grad_norm": 0.1704436093568802,
"learning_rate": 6.090239490749775e-06,
"loss": 0.4905,
"step": 2076
},
{
"epoch": 0.45,
"grad_norm": 0.1508268564939499,
"learning_rate": 6.086834072459599e-06,
"loss": 0.5288,
"step": 2077
},
{
"epoch": 0.45,
"grad_norm": 0.17939120531082153,
"learning_rate": 6.083428124930239e-06,
"loss": 0.5089,
"step": 2078
},
{
"epoch": 0.45,
"grad_norm": 0.1567559689283371,
"learning_rate": 6.080021649820238e-06,
"loss": 0.4933,
"step": 2079
},
{
"epoch": 0.45,
"grad_norm": 0.1430431753396988,
"learning_rate": 6.076614648788392e-06,
"loss": 0.5396,
"step": 2080
},
{
"epoch": 0.45,
"grad_norm": 0.15456099808216095,
"learning_rate": 6.073207123493763e-06,
"loss": 0.4786,
"step": 2081
},
{
"epoch": 0.45,
"grad_norm": 0.17080536484718323,
"learning_rate": 6.069799075595658e-06,
"loss": 0.5233,
"step": 2082
},
{
"epoch": 0.45,
"grad_norm": 0.13564909994602203,
"learning_rate": 6.066390506753644e-06,
"loss": 0.5682,
"step": 2083
},
{
"epoch": 0.45,
"grad_norm": 0.15913358330726624,
"learning_rate": 6.062981418627539e-06,
"loss": 0.5222,
"step": 2084
},
{
"epoch": 0.45,
"grad_norm": 0.16424204409122467,
"learning_rate": 6.059571812877419e-06,
"loss": 0.5062,
"step": 2085
},
{
"epoch": 0.45,
"grad_norm": 0.16678033769130707,
"learning_rate": 6.0561616911636025e-06,
"loss": 0.5138,
"step": 2086
},
{
"epoch": 0.45,
"grad_norm": 0.15992575883865356,
"learning_rate": 6.052751055146669e-06,
"loss": 0.5199,
"step": 2087
},
{
"epoch": 0.45,
"grad_norm": 0.18692535161972046,
"learning_rate": 6.049339906487443e-06,
"loss": 0.5434,
"step": 2088
},
{
"epoch": 0.45,
"grad_norm": 0.13587631285190582,
"learning_rate": 6.045928246847003e-06,
"loss": 0.5013,
"step": 2089
},
{
"epoch": 0.45,
"grad_norm": 0.20116516947746277,
"learning_rate": 6.042516077886669e-06,
"loss": 0.5329,
"step": 2090
},
{
"epoch": 0.45,
"grad_norm": 0.13471555709838867,
"learning_rate": 6.039103401268016e-06,
"loss": 0.4862,
"step": 2091
},
{
"epoch": 0.45,
"grad_norm": 0.15407685935497284,
"learning_rate": 6.035690218652861e-06,
"loss": 0.6036,
"step": 2092
},
{
"epoch": 0.45,
"grad_norm": 0.14876054227352142,
"learning_rate": 6.032276531703274e-06,
"loss": 0.4963,
"step": 2093
},
{
"epoch": 0.45,
"grad_norm": 0.16624298691749573,
"learning_rate": 6.028862342081564e-06,
"loss": 0.5164,
"step": 2094
},
{
"epoch": 0.45,
"grad_norm": 0.15883252024650574,
"learning_rate": 6.025447651450289e-06,
"loss": 0.5082,
"step": 2095
},
{
"epoch": 0.45,
"grad_norm": 0.1502091884613037,
"learning_rate": 6.022032461472247e-06,
"loss": 0.5722,
"step": 2096
},
{
"epoch": 0.45,
"grad_norm": 0.1553240269422531,
"learning_rate": 6.018616773810483e-06,
"loss": 0.5173,
"step": 2097
},
{
"epoch": 0.45,
"grad_norm": 0.15653330087661743,
"learning_rate": 6.015200590128284e-06,
"loss": 0.5355,
"step": 2098
},
{
"epoch": 0.45,
"grad_norm": 0.1457417756319046,
"learning_rate": 6.011783912089174e-06,
"loss": 0.5205,
"step": 2099
},
{
"epoch": 0.45,
"grad_norm": 0.13138940930366516,
"learning_rate": 6.008366741356926e-06,
"loss": 0.5424,
"step": 2100
},
{
"epoch": 0.45,
"grad_norm": 0.15823757648468018,
"learning_rate": 6.004949079595544e-06,
"loss": 0.5272,
"step": 2101
},
{
"epoch": 0.45,
"grad_norm": 0.17084883153438568,
"learning_rate": 6.001530928469277e-06,
"loss": 0.5291,
"step": 2102
},
{
"epoch": 0.45,
"grad_norm": 0.14622004330158234,
"learning_rate": 5.998112289642608e-06,
"loss": 0.498,
"step": 2103
},
{
"epoch": 0.45,
"grad_norm": 0.1439567655324936,
"learning_rate": 5.9946931647802645e-06,
"loss": 0.5381,
"step": 2104
},
{
"epoch": 0.45,
"grad_norm": 0.23978291451931,
"learning_rate": 5.9912735555472015e-06,
"loss": 0.5141,
"step": 2105
},
{
"epoch": 0.45,
"grad_norm": 0.14025025069713593,
"learning_rate": 5.987853463608618e-06,
"loss": 0.4712,
"step": 2106
},
{
"epoch": 0.45,
"grad_norm": 0.16210734844207764,
"learning_rate": 5.984432890629943e-06,
"loss": 0.5103,
"step": 2107
},
{
"epoch": 0.45,
"grad_norm": 0.17586356401443481,
"learning_rate": 5.981011838276841e-06,
"loss": 0.5507,
"step": 2108
},
{
"epoch": 0.45,
"grad_norm": 0.1554114818572998,
"learning_rate": 5.977590308215211e-06,
"loss": 0.5375,
"step": 2109
},
{
"epoch": 0.45,
"grad_norm": 0.14625568687915802,
"learning_rate": 5.974168302111183e-06,
"loss": 0.5195,
"step": 2110
},
{
"epoch": 0.45,
"grad_norm": 0.1564107984304428,
"learning_rate": 5.970745821631121e-06,
"loss": 0.5006,
"step": 2111
},
{
"epoch": 0.45,
"grad_norm": 0.1529904454946518,
"learning_rate": 5.967322868441616e-06,
"loss": 0.5455,
"step": 2112
},
{
"epoch": 0.46,
"grad_norm": 0.16919173300266266,
"learning_rate": 5.963899444209496e-06,
"loss": 0.5323,
"step": 2113
},
{
"epoch": 0.46,
"grad_norm": 0.2237899899482727,
"learning_rate": 5.9604755506018105e-06,
"loss": 0.5153,
"step": 2114
},
{
"epoch": 0.46,
"grad_norm": 0.17237022519111633,
"learning_rate": 5.957051189285843e-06,
"loss": 0.5237,
"step": 2115
},
{
"epoch": 0.46,
"grad_norm": 0.18111760914325714,
"learning_rate": 5.953626361929102e-06,
"loss": 0.492,
"step": 2116
},
{
"epoch": 0.46,
"grad_norm": 0.13480786979198456,
"learning_rate": 5.950201070199326e-06,
"loss": 0.4827,
"step": 2117
},
{
"epoch": 0.46,
"grad_norm": 0.17693190276622772,
"learning_rate": 5.946775315764476e-06,
"loss": 0.5592,
"step": 2118
},
{
"epoch": 0.46,
"grad_norm": 0.13885067403316498,
"learning_rate": 5.943349100292739e-06,
"loss": 0.497,
"step": 2119
},
{
"epoch": 0.46,
"grad_norm": 0.1679374873638153,
"learning_rate": 5.939922425452531e-06,
"loss": 0.5045,
"step": 2120
},
{
"epoch": 0.46,
"grad_norm": 0.1675940304994583,
"learning_rate": 5.936495292912483e-06,
"loss": 0.5518,
"step": 2121
},
{
"epoch": 0.46,
"grad_norm": 0.16924212872982025,
"learning_rate": 5.93306770434146e-06,
"loss": 0.5481,
"step": 2122
},
{
"epoch": 0.46,
"grad_norm": 0.21032755076885223,
"learning_rate": 5.929639661408538e-06,
"loss": 0.4816,
"step": 2123
},
{
"epoch": 0.46,
"grad_norm": 0.11854084581136703,
"learning_rate": 5.926211165783021e-06,
"loss": 0.5009,
"step": 2124
},
{
"epoch": 0.46,
"grad_norm": 0.13082769513130188,
"learning_rate": 5.922782219134433e-06,
"loss": 0.4822,
"step": 2125
},
{
"epoch": 0.46,
"grad_norm": 0.1662750393152237,
"learning_rate": 5.919352823132515e-06,
"loss": 0.5262,
"step": 2126
},
{
"epoch": 0.46,
"grad_norm": 0.1488747000694275,
"learning_rate": 5.915922979447228e-06,
"loss": 0.5553,
"step": 2127
},
{
"epoch": 0.46,
"grad_norm": 0.1871393918991089,
"learning_rate": 5.912492689748753e-06,
"loss": 0.4965,
"step": 2128
},
{
"epoch": 0.46,
"grad_norm": 0.18025460839271545,
"learning_rate": 5.909061955707486e-06,
"loss": 0.531,
"step": 2129
},
{
"epoch": 0.46,
"grad_norm": 0.1580578088760376,
"learning_rate": 5.905630778994036e-06,
"loss": 0.5089,
"step": 2130
},
{
"epoch": 0.46,
"grad_norm": 0.16995598375797272,
"learning_rate": 5.902199161279236e-06,
"loss": 0.5137,
"step": 2131
},
{
"epoch": 0.46,
"grad_norm": 0.14344586431980133,
"learning_rate": 5.898767104234128e-06,
"loss": 0.5051,
"step": 2132
},
{
"epoch": 0.46,
"grad_norm": 0.1728695183992386,
"learning_rate": 5.895334609529967e-06,
"loss": 0.509,
"step": 2133
},
{
"epoch": 0.46,
"grad_norm": 0.13887768983840942,
"learning_rate": 5.891901678838227e-06,
"loss": 0.4838,
"step": 2134
},
{
"epoch": 0.46,
"grad_norm": 0.18018049001693726,
"learning_rate": 5.8884683138305854e-06,
"loss": 0.5273,
"step": 2135
},
{
"epoch": 0.46,
"grad_norm": 0.15605993568897247,
"learning_rate": 5.88503451617894e-06,
"loss": 0.4847,
"step": 2136
},
{
"epoch": 0.46,
"grad_norm": 0.14139895141124725,
"learning_rate": 5.881600287555393e-06,
"loss": 0.4769,
"step": 2137
},
{
"epoch": 0.46,
"grad_norm": 0.15375615656375885,
"learning_rate": 5.878165629632262e-06,
"loss": 0.5479,
"step": 2138
},
{
"epoch": 0.46,
"grad_norm": 0.16424569487571716,
"learning_rate": 5.874730544082069e-06,
"loss": 0.5337,
"step": 2139
},
{
"epoch": 0.46,
"grad_norm": 0.20334842801094055,
"learning_rate": 5.8712950325775416e-06,
"loss": 0.5627,
"step": 2140
},
{
"epoch": 0.46,
"grad_norm": 0.13510531187057495,
"learning_rate": 5.867859096791626e-06,
"loss": 0.4906,
"step": 2141
},
{
"epoch": 0.46,
"grad_norm": 0.158345028758049,
"learning_rate": 5.864422738397465e-06,
"loss": 0.5136,
"step": 2142
},
{
"epoch": 0.46,
"grad_norm": 0.1618645340204239,
"learning_rate": 5.860985959068408e-06,
"loss": 0.4867,
"step": 2143
},
{
"epoch": 0.46,
"grad_norm": 0.1342993676662445,
"learning_rate": 5.857548760478015e-06,
"loss": 0.5011,
"step": 2144
},
{
"epoch": 0.46,
"grad_norm": 0.14608271420001984,
"learning_rate": 5.8541111443000455e-06,
"loss": 0.4916,
"step": 2145
},
{
"epoch": 0.46,
"grad_norm": 0.1352057158946991,
"learning_rate": 5.85067311220846e-06,
"loss": 0.5195,
"step": 2146
},
{
"epoch": 0.46,
"grad_norm": 0.1447547972202301,
"learning_rate": 5.847234665877432e-06,
"loss": 0.4918,
"step": 2147
},
{
"epoch": 0.46,
"grad_norm": 0.17679902911186218,
"learning_rate": 5.843795806981325e-06,
"loss": 0.5345,
"step": 2148
},
{
"epoch": 0.46,
"grad_norm": 0.1902516484260559,
"learning_rate": 5.840356537194708e-06,
"loss": 0.5343,
"step": 2149
},
{
"epoch": 0.46,
"grad_norm": 0.19225680828094482,
"learning_rate": 5.836916858192353e-06,
"loss": 0.4972,
"step": 2150
},
{
"epoch": 0.46,
"grad_norm": 0.17341876029968262,
"learning_rate": 5.833476771649227e-06,
"loss": 0.5002,
"step": 2151
},
{
"epoch": 0.46,
"grad_norm": 0.149870827794075,
"learning_rate": 5.830036279240497e-06,
"loss": 0.5484,
"step": 2152
},
{
"epoch": 0.46,
"grad_norm": 0.1548566222190857,
"learning_rate": 5.826595382641529e-06,
"loss": 0.5553,
"step": 2153
},
{
"epoch": 0.46,
"grad_norm": 0.16744022071361542,
"learning_rate": 5.823154083527884e-06,
"loss": 0.5416,
"step": 2154
},
{
"epoch": 0.46,
"grad_norm": 0.18139050900936127,
"learning_rate": 5.819712383575316e-06,
"loss": 0.5225,
"step": 2155
},
{
"epoch": 0.46,
"grad_norm": 0.16486258804798126,
"learning_rate": 5.816270284459783e-06,
"loss": 0.4938,
"step": 2156
},
{
"epoch": 0.46,
"grad_norm": 0.15385212004184723,
"learning_rate": 5.812827787857428e-06,
"loss": 0.562,
"step": 2157
},
{
"epoch": 0.46,
"grad_norm": 0.17840281128883362,
"learning_rate": 5.809384895444594e-06,
"loss": 0.487,
"step": 2158
},
{
"epoch": 0.47,
"grad_norm": 0.16368557512760162,
"learning_rate": 5.805941608897814e-06,
"loss": 0.4991,
"step": 2159
},
{
"epoch": 0.47,
"grad_norm": 0.22969526052474976,
"learning_rate": 5.802497929893813e-06,
"loss": 0.4751,
"step": 2160
},
{
"epoch": 0.47,
"grad_norm": 0.21182815730571747,
"learning_rate": 5.799053860109506e-06,
"loss": 0.5603,
"step": 2161
},
{
"epoch": 0.47,
"grad_norm": 0.16508375108242035,
"learning_rate": 5.795609401222001e-06,
"loss": 0.5308,
"step": 2162
},
{
"epoch": 0.47,
"grad_norm": 0.3659750521183014,
"learning_rate": 5.7921645549085955e-06,
"loss": 0.5229,
"step": 2163
},
{
"epoch": 0.47,
"grad_norm": 0.15634752810001373,
"learning_rate": 5.7887193228467735e-06,
"loss": 0.5594,
"step": 2164
},
{
"epoch": 0.47,
"grad_norm": 0.15100319683551788,
"learning_rate": 5.785273706714205e-06,
"loss": 0.5619,
"step": 2165
},
{
"epoch": 0.47,
"grad_norm": 0.13537266850471497,
"learning_rate": 5.781827708188753e-06,
"loss": 0.5224,
"step": 2166
},
{
"epoch": 0.47,
"grad_norm": 0.16945107281208038,
"learning_rate": 5.778381328948461e-06,
"loss": 0.513,
"step": 2167
},
{
"epoch": 0.47,
"grad_norm": 0.1476183384656906,
"learning_rate": 5.774934570671562e-06,
"loss": 0.5124,
"step": 2168
},
{
"epoch": 0.47,
"grad_norm": 0.156847283244133,
"learning_rate": 5.771487435036472e-06,
"loss": 0.5185,
"step": 2169
},
{
"epoch": 0.47,
"grad_norm": 0.24519124627113342,
"learning_rate": 5.768039923721791e-06,
"loss": 0.5001,
"step": 2170
},
{
"epoch": 0.47,
"grad_norm": 0.19340813159942627,
"learning_rate": 5.764592038406298e-06,
"loss": 0.528,
"step": 2171
},
{
"epoch": 0.47,
"grad_norm": 0.16022874414920807,
"learning_rate": 5.761143780768962e-06,
"loss": 0.4961,
"step": 2172
},
{
"epoch": 0.47,
"grad_norm": 0.18600255250930786,
"learning_rate": 5.7576951524889245e-06,
"loss": 0.4908,
"step": 2173
},
{
"epoch": 0.47,
"grad_norm": 0.11501923948526382,
"learning_rate": 5.7542461552455165e-06,
"loss": 0.5403,
"step": 2174
},
{
"epoch": 0.47,
"grad_norm": 0.14986415207386017,
"learning_rate": 5.750796790718243e-06,
"loss": 0.5027,
"step": 2175
},
{
"epoch": 0.47,
"grad_norm": 0.13095037639141083,
"learning_rate": 5.747347060586787e-06,
"loss": 0.5339,
"step": 2176
},
{
"epoch": 0.47,
"grad_norm": 0.12488746643066406,
"learning_rate": 5.743896966531012e-06,
"loss": 0.5256,
"step": 2177
},
{
"epoch": 0.47,
"grad_norm": 0.1328728049993515,
"learning_rate": 5.740446510230959e-06,
"loss": 0.429,
"step": 2178
},
{
"epoch": 0.47,
"grad_norm": 0.13304339349269867,
"learning_rate": 5.736995693366847e-06,
"loss": 0.4621,
"step": 2179
},
{
"epoch": 0.47,
"grad_norm": 0.22455641627311707,
"learning_rate": 5.733544517619064e-06,
"loss": 0.5157,
"step": 2180
},
{
"epoch": 0.47,
"grad_norm": 0.13997776806354523,
"learning_rate": 5.730092984668179e-06,
"loss": 0.4909,
"step": 2181
},
{
"epoch": 0.47,
"grad_norm": 0.1835583746433258,
"learning_rate": 5.726641096194932e-06,
"loss": 0.4697,
"step": 2182
},
{
"epoch": 0.47,
"grad_norm": 0.1669677495956421,
"learning_rate": 5.723188853880238e-06,
"loss": 0.5484,
"step": 2183
},
{
"epoch": 0.47,
"grad_norm": 0.1625543087720871,
"learning_rate": 5.719736259405182e-06,
"loss": 0.4743,
"step": 2184
},
{
"epoch": 0.47,
"grad_norm": 0.15123441815376282,
"learning_rate": 5.716283314451026e-06,
"loss": 0.482,
"step": 2185
},
{
"epoch": 0.47,
"grad_norm": 0.16270317137241364,
"learning_rate": 5.7128300206991925e-06,
"loss": 0.4675,
"step": 2186
},
{
"epoch": 0.47,
"grad_norm": 0.1661555916070938,
"learning_rate": 5.709376379831283e-06,
"loss": 0.5076,
"step": 2187
},
{
"epoch": 0.47,
"grad_norm": 0.16409648954868317,
"learning_rate": 5.705922393529065e-06,
"loss": 0.5271,
"step": 2188
},
{
"epoch": 0.47,
"grad_norm": 0.14545123279094696,
"learning_rate": 5.702468063474473e-06,
"loss": 0.4966,
"step": 2189
},
{
"epoch": 0.47,
"grad_norm": 0.22827212512493134,
"learning_rate": 5.69901339134961e-06,
"loss": 0.4808,
"step": 2190
},
{
"epoch": 0.47,
"grad_norm": 0.1843656748533249,
"learning_rate": 5.695558378836749e-06,
"loss": 0.505,
"step": 2191
},
{
"epoch": 0.47,
"grad_norm": 0.19031104445457458,
"learning_rate": 5.692103027618321e-06,
"loss": 0.5571,
"step": 2192
},
{
"epoch": 0.47,
"grad_norm": 0.16894584894180298,
"learning_rate": 5.688647339376926e-06,
"loss": 0.5266,
"step": 2193
},
{
"epoch": 0.47,
"grad_norm": 0.14823244512081146,
"learning_rate": 5.685191315795331e-06,
"loss": 0.5572,
"step": 2194
},
{
"epoch": 0.47,
"grad_norm": 0.13419359922409058,
"learning_rate": 5.681734958556463e-06,
"loss": 0.5163,
"step": 2195
},
{
"epoch": 0.47,
"grad_norm": 0.18760497868061066,
"learning_rate": 5.678278269343411e-06,
"loss": 0.5218,
"step": 2196
},
{
"epoch": 0.47,
"grad_norm": 0.1401587277650833,
"learning_rate": 5.674821249839428e-06,
"loss": 0.4821,
"step": 2197
},
{
"epoch": 0.47,
"grad_norm": 0.15496966242790222,
"learning_rate": 5.671363901727927e-06,
"loss": 0.504,
"step": 2198
},
{
"epoch": 0.47,
"grad_norm": 0.17478565871715546,
"learning_rate": 5.667906226692479e-06,
"loss": 0.5252,
"step": 2199
},
{
"epoch": 0.47,
"grad_norm": 0.14033323526382446,
"learning_rate": 5.664448226416815e-06,
"loss": 0.5534,
"step": 2200
},
{
"epoch": 0.47,
"grad_norm": 0.23815791308879852,
"learning_rate": 5.660989902584829e-06,
"loss": 0.5357,
"step": 2201
},
{
"epoch": 0.47,
"grad_norm": 0.16176384687423706,
"learning_rate": 5.657531256880565e-06,
"loss": 0.5378,
"step": 2202
},
{
"epoch": 0.47,
"grad_norm": 0.20444779098033905,
"learning_rate": 5.654072290988231e-06,
"loss": 0.5905,
"step": 2203
},
{
"epoch": 0.47,
"grad_norm": 0.14830709993839264,
"learning_rate": 5.650613006592185e-06,
"loss": 0.5192,
"step": 2204
},
{
"epoch": 0.47,
"grad_norm": 0.2211901992559433,
"learning_rate": 5.647153405376942e-06,
"loss": 0.564,
"step": 2205
},
{
"epoch": 0.48,
"grad_norm": 0.15610624849796295,
"learning_rate": 5.643693489027172e-06,
"loss": 0.49,
"step": 2206
},
{
"epoch": 0.48,
"grad_norm": 0.13824397325515747,
"learning_rate": 5.6402332592277e-06,
"loss": 0.519,
"step": 2207
},
{
"epoch": 0.48,
"grad_norm": 0.18318380415439606,
"learning_rate": 5.636772717663501e-06,
"loss": 0.5294,
"step": 2208
},
{
"epoch": 0.48,
"grad_norm": 0.20423349738121033,
"learning_rate": 5.633311866019703e-06,
"loss": 0.5128,
"step": 2209
},
{
"epoch": 0.48,
"grad_norm": 0.14289386570453644,
"learning_rate": 5.629850705981584e-06,
"loss": 0.5008,
"step": 2210
},
{
"epoch": 0.48,
"grad_norm": 0.17370502650737762,
"learning_rate": 5.626389239234572e-06,
"loss": 0.5657,
"step": 2211
},
{
"epoch": 0.48,
"grad_norm": 0.1700432002544403,
"learning_rate": 5.622927467464247e-06,
"loss": 0.5137,
"step": 2212
},
{
"epoch": 0.48,
"grad_norm": 0.1566981077194214,
"learning_rate": 5.619465392356335e-06,
"loss": 0.5698,
"step": 2213
},
{
"epoch": 0.48,
"grad_norm": 0.166670560836792,
"learning_rate": 5.6160030155967116e-06,
"loss": 0.5272,
"step": 2214
},
{
"epoch": 0.48,
"grad_norm": 0.14587420225143433,
"learning_rate": 5.612540338871395e-06,
"loss": 0.5049,
"step": 2215
},
{
"epoch": 0.48,
"grad_norm": 0.14537444710731506,
"learning_rate": 5.609077363866555e-06,
"loss": 0.523,
"step": 2216
},
{
"epoch": 0.48,
"grad_norm": 0.15122370421886444,
"learning_rate": 5.605614092268506e-06,
"loss": 0.5304,
"step": 2217
},
{
"epoch": 0.48,
"grad_norm": 0.11322161555290222,
"learning_rate": 5.602150525763701e-06,
"loss": 0.5269,
"step": 2218
},
{
"epoch": 0.48,
"grad_norm": 0.1510639786720276,
"learning_rate": 5.598686666038745e-06,
"loss": 0.5668,
"step": 2219
},
{
"epoch": 0.48,
"grad_norm": 0.16219152510166168,
"learning_rate": 5.595222514780379e-06,
"loss": 0.5016,
"step": 2220
},
{
"epoch": 0.48,
"grad_norm": 0.14243803918361664,
"learning_rate": 5.591758073675485e-06,
"loss": 0.5398,
"step": 2221
},
{
"epoch": 0.48,
"grad_norm": 0.16937606036663055,
"learning_rate": 5.588293344411097e-06,
"loss": 0.5621,
"step": 2222
},
{
"epoch": 0.48,
"grad_norm": 0.15524210035800934,
"learning_rate": 5.5848283286743786e-06,
"loss": 0.5695,
"step": 2223
},
{
"epoch": 0.48,
"grad_norm": 0.1765149086713791,
"learning_rate": 5.581363028152633e-06,
"loss": 0.5126,
"step": 2224
},
{
"epoch": 0.48,
"grad_norm": 0.2328573763370514,
"learning_rate": 5.5778974445333115e-06,
"loss": 0.5701,
"step": 2225
},
{
"epoch": 0.48,
"grad_norm": 0.1344151794910431,
"learning_rate": 5.574431579503991e-06,
"loss": 0.5512,
"step": 2226
},
{
"epoch": 0.48,
"grad_norm": 0.14871002733707428,
"learning_rate": 5.570965434752396e-06,
"loss": 0.5196,
"step": 2227
},
{
"epoch": 0.48,
"grad_norm": 0.19491346180438995,
"learning_rate": 5.5674990119663794e-06,
"loss": 0.5809,
"step": 2228
},
{
"epoch": 0.48,
"grad_norm": 0.15575414896011353,
"learning_rate": 5.564032312833936e-06,
"loss": 0.5395,
"step": 2229
},
{
"epoch": 0.48,
"grad_norm": 0.25920212268829346,
"learning_rate": 5.560565339043188e-06,
"loss": 0.4677,
"step": 2230
},
{
"epoch": 0.48,
"grad_norm": 0.1457945555448532,
"learning_rate": 5.557098092282399e-06,
"loss": 0.5326,
"step": 2231
},
{
"epoch": 0.48,
"grad_norm": 0.13234636187553406,
"learning_rate": 5.55363057423996e-06,
"loss": 0.4859,
"step": 2232
},
{
"epoch": 0.48,
"grad_norm": 0.146928608417511,
"learning_rate": 5.550162786604397e-06,
"loss": 0.5834,
"step": 2233
},
{
"epoch": 0.48,
"grad_norm": 0.13184037804603577,
"learning_rate": 5.546694731064364e-06,
"loss": 0.5236,
"step": 2234
},
{
"epoch": 0.48,
"grad_norm": 0.2852530777454376,
"learning_rate": 5.5432264093086505e-06,
"loss": 0.5034,
"step": 2235
},
{
"epoch": 0.48,
"grad_norm": 0.15083038806915283,
"learning_rate": 5.5397578230261715e-06,
"loss": 0.5118,
"step": 2236
},
{
"epoch": 0.48,
"grad_norm": 0.1430756151676178,
"learning_rate": 5.536288973905971e-06,
"loss": 0.5202,
"step": 2237
},
{
"epoch": 0.48,
"grad_norm": 0.16797691583633423,
"learning_rate": 5.532819863637223e-06,
"loss": 0.5105,
"step": 2238
},
{
"epoch": 0.48,
"grad_norm": 0.15367530286312103,
"learning_rate": 5.529350493909229e-06,
"loss": 0.5178,
"step": 2239
},
{
"epoch": 0.48,
"grad_norm": 0.13238172233104706,
"learning_rate": 5.525880866411414e-06,
"loss": 0.5376,
"step": 2240
},
{
"epoch": 0.48,
"grad_norm": 0.17009180784225464,
"learning_rate": 5.522410982833331e-06,
"loss": 0.5508,
"step": 2241
},
{
"epoch": 0.48,
"grad_norm": 0.1846666783094406,
"learning_rate": 5.5189408448646565e-06,
"loss": 0.5625,
"step": 2242
},
{
"epoch": 0.48,
"grad_norm": 0.18193793296813965,
"learning_rate": 5.515470454195188e-06,
"loss": 0.4663,
"step": 2243
},
{
"epoch": 0.48,
"grad_norm": 0.15874691307544708,
"learning_rate": 5.511999812514857e-06,
"loss": 0.5035,
"step": 2244
},
{
"epoch": 0.48,
"grad_norm": 0.17099712789058685,
"learning_rate": 5.5085289215137035e-06,
"loss": 0.5301,
"step": 2245
},
{
"epoch": 0.48,
"grad_norm": 0.14446376264095306,
"learning_rate": 5.505057782881896e-06,
"loss": 0.4915,
"step": 2246
},
{
"epoch": 0.48,
"grad_norm": 0.3006593883037567,
"learning_rate": 5.501586398309724e-06,
"loss": 0.5032,
"step": 2247
},
{
"epoch": 0.48,
"grad_norm": 0.175115704536438,
"learning_rate": 5.4981147694875924e-06,
"loss": 0.5242,
"step": 2248
},
{
"epoch": 0.48,
"grad_norm": 0.14558811485767365,
"learning_rate": 5.494642898106029e-06,
"loss": 0.4991,
"step": 2249
},
{
"epoch": 0.48,
"grad_norm": 0.1611151546239853,
"learning_rate": 5.491170785855681e-06,
"loss": 0.5272,
"step": 2250
},
{
"epoch": 0.48,
"grad_norm": 0.15863467752933502,
"learning_rate": 5.4876984344273095e-06,
"loss": 0.5034,
"step": 2251
},
{
"epoch": 0.49,
"grad_norm": 0.1683708131313324,
"learning_rate": 5.484225845511791e-06,
"loss": 0.4884,
"step": 2252
},
{
"epoch": 0.49,
"grad_norm": 0.1344245821237564,
"learning_rate": 5.480753020800121e-06,
"loss": 0.5165,
"step": 2253
},
{
"epoch": 0.49,
"grad_norm": 0.1735605001449585,
"learning_rate": 5.477279961983408e-06,
"loss": 0.5519,
"step": 2254
},
{
"epoch": 0.49,
"grad_norm": 0.14727462828159332,
"learning_rate": 5.473806670752877e-06,
"loss": 0.4778,
"step": 2255
},
{
"epoch": 0.49,
"grad_norm": 0.1414579451084137,
"learning_rate": 5.470333148799862e-06,
"loss": 0.4707,
"step": 2256
},
{
"epoch": 0.49,
"grad_norm": 0.1338963657617569,
"learning_rate": 5.466859397815812e-06,
"loss": 0.5236,
"step": 2257
},
{
"epoch": 0.49,
"grad_norm": 0.1523580551147461,
"learning_rate": 5.463385419492288e-06,
"loss": 0.516,
"step": 2258
},
{
"epoch": 0.49,
"grad_norm": 0.17260035872459412,
"learning_rate": 5.459911215520959e-06,
"loss": 0.5188,
"step": 2259
},
{
"epoch": 0.49,
"grad_norm": 0.19136221706867218,
"learning_rate": 5.456436787593609e-06,
"loss": 0.4909,
"step": 2260
},
{
"epoch": 0.49,
"grad_norm": 0.17576466500759125,
"learning_rate": 5.452962137402125e-06,
"loss": 0.5374,
"step": 2261
},
{
"epoch": 0.49,
"grad_norm": 0.18410582840442657,
"learning_rate": 5.449487266638504e-06,
"loss": 0.5541,
"step": 2262
},
{
"epoch": 0.49,
"grad_norm": 0.15502192080020905,
"learning_rate": 5.446012176994854e-06,
"loss": 0.5411,
"step": 2263
},
{
"epoch": 0.49,
"grad_norm": 0.21357733011245728,
"learning_rate": 5.442536870163386e-06,
"loss": 0.5284,
"step": 2264
},
{
"epoch": 0.49,
"grad_norm": 0.15364959836006165,
"learning_rate": 5.439061347836416e-06,
"loss": 0.4631,
"step": 2265
},
{
"epoch": 0.49,
"grad_norm": 0.14856620132923126,
"learning_rate": 5.43558561170637e-06,
"loss": 0.5164,
"step": 2266
},
{
"epoch": 0.49,
"grad_norm": 0.13780789077281952,
"learning_rate": 5.432109663465773e-06,
"loss": 0.5108,
"step": 2267
},
{
"epoch": 0.49,
"grad_norm": 0.13712283968925476,
"learning_rate": 5.428633504807253e-06,
"loss": 0.4914,
"step": 2268
},
{
"epoch": 0.49,
"grad_norm": 0.1509259045124054,
"learning_rate": 5.425157137423548e-06,
"loss": 0.5178,
"step": 2269
},
{
"epoch": 0.49,
"grad_norm": 0.16157595813274384,
"learning_rate": 5.421680563007486e-06,
"loss": 0.5337,
"step": 2270
},
{
"epoch": 0.49,
"grad_norm": 0.17313942313194275,
"learning_rate": 5.418203783252005e-06,
"loss": 0.512,
"step": 2271
},
{
"epoch": 0.49,
"grad_norm": 0.1417136937379837,
"learning_rate": 5.414726799850141e-06,
"loss": 0.5123,
"step": 2272
},
{
"epoch": 0.49,
"grad_norm": 0.15452702343463898,
"learning_rate": 5.411249614495027e-06,
"loss": 0.5249,
"step": 2273
},
{
"epoch": 0.49,
"grad_norm": 0.17498227953910828,
"learning_rate": 5.407772228879894e-06,
"loss": 0.5008,
"step": 2274
},
{
"epoch": 0.49,
"grad_norm": 0.2232121229171753,
"learning_rate": 5.404294644698073e-06,
"loss": 0.5113,
"step": 2275
},
{
"epoch": 0.49,
"grad_norm": 0.11952576041221619,
"learning_rate": 5.400816863642991e-06,
"loss": 0.5147,
"step": 2276
},
{
"epoch": 0.49,
"grad_norm": 0.15340656042099,
"learning_rate": 5.397338887408171e-06,
"loss": 0.478,
"step": 2277
},
{
"epoch": 0.49,
"grad_norm": 0.1494847536087036,
"learning_rate": 5.393860717687231e-06,
"loss": 0.5173,
"step": 2278
},
{
"epoch": 0.49,
"grad_norm": 0.16914784908294678,
"learning_rate": 5.390382356173881e-06,
"loss": 0.4979,
"step": 2279
},
{
"epoch": 0.49,
"grad_norm": 0.10972032696008682,
"learning_rate": 5.3869038045619275e-06,
"loss": 0.5214,
"step": 2280
},
{
"epoch": 0.49,
"grad_norm": 0.1643581986427307,
"learning_rate": 5.383425064545267e-06,
"loss": 0.535,
"step": 2281
},
{
"epoch": 0.49,
"grad_norm": 0.1384391486644745,
"learning_rate": 5.379946137817891e-06,
"loss": 0.5034,
"step": 2282
},
{
"epoch": 0.49,
"grad_norm": 0.1642947793006897,
"learning_rate": 5.376467026073878e-06,
"loss": 0.5549,
"step": 2283
},
{
"epoch": 0.49,
"grad_norm": 0.15689925849437714,
"learning_rate": 5.3729877310073985e-06,
"loss": 0.5086,
"step": 2284
},
{
"epoch": 0.49,
"grad_norm": 0.17627274990081787,
"learning_rate": 5.369508254312715e-06,
"loss": 0.5223,
"step": 2285
},
{
"epoch": 0.49,
"grad_norm": 0.12727420032024384,
"learning_rate": 5.366028597684173e-06,
"loss": 0.5149,
"step": 2286
},
{
"epoch": 0.49,
"grad_norm": 0.15203452110290527,
"learning_rate": 5.362548762816209e-06,
"loss": 0.5713,
"step": 2287
},
{
"epoch": 0.49,
"grad_norm": 0.13790200650691986,
"learning_rate": 5.359068751403347e-06,
"loss": 0.545,
"step": 2288
},
{
"epoch": 0.49,
"grad_norm": 0.13259437680244446,
"learning_rate": 5.355588565140195e-06,
"loss": 0.4586,
"step": 2289
},
{
"epoch": 0.49,
"grad_norm": 0.1421840488910675,
"learning_rate": 5.352108205721445e-06,
"loss": 0.4915,
"step": 2290
},
{
"epoch": 0.49,
"grad_norm": 0.14462217688560486,
"learning_rate": 5.348627674841876e-06,
"loss": 0.4412,
"step": 2291
},
{
"epoch": 0.49,
"grad_norm": 0.15902197360992432,
"learning_rate": 5.345146974196351e-06,
"loss": 0.5418,
"step": 2292
},
{
"epoch": 0.49,
"grad_norm": 0.1560838520526886,
"learning_rate": 5.341666105479812e-06,
"loss": 0.4639,
"step": 2293
},
{
"epoch": 0.49,
"grad_norm": 0.15082865953445435,
"learning_rate": 5.338185070387289e-06,
"loss": 0.501,
"step": 2294
},
{
"epoch": 0.49,
"grad_norm": 0.1447245180606842,
"learning_rate": 5.334703870613887e-06,
"loss": 0.4603,
"step": 2295
},
{
"epoch": 0.49,
"grad_norm": 0.23148810863494873,
"learning_rate": 5.3312225078547895e-06,
"loss": 0.5145,
"step": 2296
},
{
"epoch": 0.49,
"grad_norm": 0.1934991329908371,
"learning_rate": 5.327740983805267e-06,
"loss": 0.5137,
"step": 2297
},
{
"epoch": 0.5,
"grad_norm": 0.18782839179039001,
"learning_rate": 5.324259300160667e-06,
"loss": 0.5348,
"step": 2298
},
{
"epoch": 0.5,
"grad_norm": 0.17964793741703033,
"learning_rate": 5.320777458616407e-06,
"loss": 0.4938,
"step": 2299
},
{
"epoch": 0.5,
"grad_norm": 0.1606227159500122,
"learning_rate": 5.31729546086799e-06,
"loss": 0.5483,
"step": 2300
},
{
"epoch": 0.5,
"grad_norm": 0.16519147157669067,
"learning_rate": 5.313813308610993e-06,
"loss": 0.5018,
"step": 2301
},
{
"epoch": 0.5,
"grad_norm": 0.1705171763896942,
"learning_rate": 5.310331003541065e-06,
"loss": 0.4838,
"step": 2302
},
{
"epoch": 0.5,
"grad_norm": 0.22581948339939117,
"learning_rate": 5.30684854735393e-06,
"loss": 0.5207,
"step": 2303
},
{
"epoch": 0.5,
"grad_norm": 0.16089698672294617,
"learning_rate": 5.303365941745392e-06,
"loss": 0.5237,
"step": 2304
},
{
"epoch": 0.5,
"grad_norm": 0.15881328284740448,
"learning_rate": 5.299883188411318e-06,
"loss": 0.477,
"step": 2305
},
{
"epoch": 0.5,
"grad_norm": 0.21279747784137726,
"learning_rate": 5.296400289047655e-06,
"loss": 0.5183,
"step": 2306
},
{
"epoch": 0.5,
"grad_norm": 0.16910669207572937,
"learning_rate": 5.292917245350417e-06,
"loss": 0.4759,
"step": 2307
},
{
"epoch": 0.5,
"grad_norm": 0.16905193030834198,
"learning_rate": 5.289434059015689e-06,
"loss": 0.5334,
"step": 2308
},
{
"epoch": 0.5,
"grad_norm": 0.11337817460298538,
"learning_rate": 5.285950731739624e-06,
"loss": 0.4597,
"step": 2309
},
{
"epoch": 0.5,
"grad_norm": 0.20089925825595856,
"learning_rate": 5.28246726521845e-06,
"loss": 0.5143,
"step": 2310
},
{
"epoch": 0.5,
"grad_norm": 0.152847558259964,
"learning_rate": 5.278983661148453e-06,
"loss": 0.5067,
"step": 2311
},
{
"epoch": 0.5,
"grad_norm": 0.16231143474578857,
"learning_rate": 5.275499921225994e-06,
"loss": 0.4883,
"step": 2312
},
{
"epoch": 0.5,
"grad_norm": 0.13849905133247375,
"learning_rate": 5.2720160471474955e-06,
"loss": 0.5279,
"step": 2313
},
{
"epoch": 0.5,
"grad_norm": 0.2002251148223877,
"learning_rate": 5.26853204060945e-06,
"loss": 0.5652,
"step": 2314
},
{
"epoch": 0.5,
"grad_norm": 0.14642587304115295,
"learning_rate": 5.2650479033084075e-06,
"loss": 0.4926,
"step": 2315
},
{
"epoch": 0.5,
"grad_norm": 0.19536569714546204,
"learning_rate": 5.26156363694099e-06,
"loss": 0.5673,
"step": 2316
},
{
"epoch": 0.5,
"grad_norm": 0.16617797315120697,
"learning_rate": 5.258079243203875e-06,
"loss": 0.5427,
"step": 2317
},
{
"epoch": 0.5,
"grad_norm": 0.11626624315977097,
"learning_rate": 5.2545947237938055e-06,
"loss": 0.5398,
"step": 2318
},
{
"epoch": 0.5,
"grad_norm": 0.17686258256435394,
"learning_rate": 5.251110080407587e-06,
"loss": 0.5253,
"step": 2319
},
{
"epoch": 0.5,
"grad_norm": 0.1972484439611435,
"learning_rate": 5.247625314742083e-06,
"loss": 0.4815,
"step": 2320
},
{
"epoch": 0.5,
"grad_norm": 0.14836078882217407,
"learning_rate": 5.244140428494216e-06,
"loss": 0.5806,
"step": 2321
},
{
"epoch": 0.5,
"grad_norm": 0.22560933232307434,
"learning_rate": 5.240655423360969e-06,
"loss": 0.5267,
"step": 2322
},
{
"epoch": 0.5,
"grad_norm": 0.19489476084709167,
"learning_rate": 5.237170301039385e-06,
"loss": 0.5376,
"step": 2323
},
{
"epoch": 0.5,
"grad_norm": 0.1505575180053711,
"learning_rate": 5.233685063226557e-06,
"loss": 0.5049,
"step": 2324
},
{
"epoch": 0.5,
"grad_norm": 0.1474577635526657,
"learning_rate": 5.23019971161964e-06,
"loss": 0.5244,
"step": 2325
},
{
"epoch": 0.5,
"grad_norm": 0.15484069287776947,
"learning_rate": 5.226714247915846e-06,
"loss": 0.5052,
"step": 2326
},
{
"epoch": 0.5,
"grad_norm": 0.1554277092218399,
"learning_rate": 5.2232286738124346e-06,
"loss": 0.557,
"step": 2327
},
{
"epoch": 0.5,
"grad_norm": 0.16746380925178528,
"learning_rate": 5.219742991006728e-06,
"loss": 0.5164,
"step": 2328
},
{
"epoch": 0.5,
"grad_norm": 0.19356447458267212,
"learning_rate": 5.216257201196091e-06,
"loss": 0.5051,
"step": 2329
},
{
"epoch": 0.5,
"grad_norm": 0.19989141821861267,
"learning_rate": 5.212771306077951e-06,
"loss": 0.545,
"step": 2330
},
{
"epoch": 0.5,
"grad_norm": 0.14954493939876556,
"learning_rate": 5.209285307349776e-06,
"loss": 0.4857,
"step": 2331
},
{
"epoch": 0.5,
"grad_norm": 0.1772209256887436,
"learning_rate": 5.205799206709097e-06,
"loss": 0.4962,
"step": 2332
},
{
"epoch": 0.5,
"grad_norm": 0.18169115483760834,
"learning_rate": 5.202313005853483e-06,
"loss": 0.5147,
"step": 2333
},
{
"epoch": 0.5,
"grad_norm": 0.1574869155883789,
"learning_rate": 5.198826706480558e-06,
"loss": 0.5343,
"step": 2334
},
{
"epoch": 0.5,
"grad_norm": 0.1543438583612442,
"learning_rate": 5.195340310287993e-06,
"loss": 0.4861,
"step": 2335
},
{
"epoch": 0.5,
"grad_norm": 0.16991272568702698,
"learning_rate": 5.191853818973505e-06,
"loss": 0.5657,
"step": 2336
},
{
"epoch": 0.5,
"grad_norm": 0.198355033993721,
"learning_rate": 5.188367234234859e-06,
"loss": 0.551,
"step": 2337
},
{
"epoch": 0.5,
"grad_norm": 0.1566164791584015,
"learning_rate": 5.184880557769865e-06,
"loss": 0.5248,
"step": 2338
},
{
"epoch": 0.5,
"grad_norm": 0.1619618833065033,
"learning_rate": 5.181393791276374e-06,
"loss": 0.4884,
"step": 2339
},
{
"epoch": 0.5,
"grad_norm": 0.1328553855419159,
"learning_rate": 5.177906936452287e-06,
"loss": 0.5129,
"step": 2340
},
{
"epoch": 0.5,
"grad_norm": 0.1531621217727661,
"learning_rate": 5.174419994995545e-06,
"loss": 0.4932,
"step": 2341
},
{
"epoch": 0.5,
"grad_norm": 0.20409497618675232,
"learning_rate": 5.170932968604131e-06,
"loss": 0.5065,
"step": 2342
},
{
"epoch": 0.5,
"grad_norm": 0.14799822866916656,
"learning_rate": 5.167445858976068e-06,
"loss": 0.5578,
"step": 2343
},
{
"epoch": 0.5,
"grad_norm": 0.1554175615310669,
"learning_rate": 5.163958667809422e-06,
"loss": 0.514,
"step": 2344
},
{
"epoch": 0.51,
"grad_norm": 0.19117942452430725,
"learning_rate": 5.1604713968023e-06,
"loss": 0.5341,
"step": 2345
},
{
"epoch": 0.51,
"grad_norm": 0.15868812799453735,
"learning_rate": 5.156984047652841e-06,
"loss": 0.5528,
"step": 2346
},
{
"epoch": 0.51,
"grad_norm": 0.13103894889354706,
"learning_rate": 5.153496622059232e-06,
"loss": 0.4764,
"step": 2347
},
{
"epoch": 0.51,
"grad_norm": 0.1614736169576645,
"learning_rate": 5.15000912171969e-06,
"loss": 0.5218,
"step": 2348
},
{
"epoch": 0.51,
"grad_norm": 0.1403590589761734,
"learning_rate": 5.1465215483324685e-06,
"loss": 0.493,
"step": 2349
},
{
"epoch": 0.51,
"grad_norm": 0.13807451725006104,
"learning_rate": 5.143033903595862e-06,
"loss": 0.502,
"step": 2350
},
{
"epoch": 0.51,
"grad_norm": 0.1550104022026062,
"learning_rate": 5.1395461892081925e-06,
"loss": 0.541,
"step": 2351
},
{
"epoch": 0.51,
"grad_norm": 0.18088415265083313,
"learning_rate": 5.1360584068678225e-06,
"loss": 0.4898,
"step": 2352
},
{
"epoch": 0.51,
"grad_norm": 0.1560092568397522,
"learning_rate": 5.132570558273143e-06,
"loss": 0.4938,
"step": 2353
},
{
"epoch": 0.51,
"grad_norm": 0.21202325820922852,
"learning_rate": 5.129082645122579e-06,
"loss": 0.5163,
"step": 2354
},
{
"epoch": 0.51,
"grad_norm": 0.1860700100660324,
"learning_rate": 5.125594669114589e-06,
"loss": 0.528,
"step": 2355
},
{
"epoch": 0.51,
"grad_norm": 0.17803077399730682,
"learning_rate": 5.1221066319476576e-06,
"loss": 0.5005,
"step": 2356
},
{
"epoch": 0.51,
"grad_norm": 0.13310760259628296,
"learning_rate": 5.118618535320303e-06,
"loss": 0.5061,
"step": 2357
},
{
"epoch": 0.51,
"grad_norm": 0.14596043527126312,
"learning_rate": 5.115130380931071e-06,
"loss": 0.5381,
"step": 2358
},
{
"epoch": 0.51,
"grad_norm": 0.1787167489528656,
"learning_rate": 5.111642170478534e-06,
"loss": 0.4973,
"step": 2359
},
{
"epoch": 0.51,
"grad_norm": 0.1591702401638031,
"learning_rate": 5.108153905661296e-06,
"loss": 0.5501,
"step": 2360
},
{
"epoch": 0.51,
"grad_norm": 0.15234871208667755,
"learning_rate": 5.1046655881779825e-06,
"loss": 0.5135,
"step": 2361
},
{
"epoch": 0.51,
"grad_norm": 0.19040155410766602,
"learning_rate": 5.101177219727245e-06,
"loss": 0.5693,
"step": 2362
},
{
"epoch": 0.51,
"grad_norm": 0.15070025622844696,
"learning_rate": 5.097688802007767e-06,
"loss": 0.5232,
"step": 2363
},
{
"epoch": 0.51,
"grad_norm": 0.15969093143939972,
"learning_rate": 5.094200336718246e-06,
"loss": 0.5405,
"step": 2364
},
{
"epoch": 0.51,
"grad_norm": 0.12944184243679047,
"learning_rate": 5.090711825557408e-06,
"loss": 0.491,
"step": 2365
},
{
"epoch": 0.51,
"grad_norm": 0.1388048529624939,
"learning_rate": 5.087223270224003e-06,
"loss": 0.5004,
"step": 2366
},
{
"epoch": 0.51,
"grad_norm": 0.18210247159004211,
"learning_rate": 5.083734672416797e-06,
"loss": 0.4767,
"step": 2367
},
{
"epoch": 0.51,
"grad_norm": 0.1709405779838562,
"learning_rate": 5.080246033834581e-06,
"loss": 0.5355,
"step": 2368
},
{
"epoch": 0.51,
"grad_norm": 0.16608983278274536,
"learning_rate": 5.076757356176168e-06,
"loss": 0.5589,
"step": 2369
},
{
"epoch": 0.51,
"grad_norm": 0.18925471603870392,
"learning_rate": 5.0732686411403816e-06,
"loss": 0.5443,
"step": 2370
},
{
"epoch": 0.51,
"grad_norm": 0.17456351220607758,
"learning_rate": 5.069779890426072e-06,
"loss": 0.4903,
"step": 2371
},
{
"epoch": 0.51,
"grad_norm": 0.14656615257263184,
"learning_rate": 5.066291105732102e-06,
"loss": 0.4646,
"step": 2372
},
{
"epoch": 0.51,
"grad_norm": 0.14051038026809692,
"learning_rate": 5.0628022887573515e-06,
"loss": 0.5032,
"step": 2373
},
{
"epoch": 0.51,
"grad_norm": 0.15590442717075348,
"learning_rate": 5.05931344120072e-06,
"loss": 0.5255,
"step": 2374
},
{
"epoch": 0.51,
"grad_norm": 0.15599004924297333,
"learning_rate": 5.0558245647611155e-06,
"loss": 0.5418,
"step": 2375
},
{
"epoch": 0.51,
"grad_norm": 0.1530722975730896,
"learning_rate": 5.052335661137467e-06,
"loss": 0.469,
"step": 2376
},
{
"epoch": 0.51,
"grad_norm": 0.16184838116168976,
"learning_rate": 5.0488467320287106e-06,
"loss": 0.4887,
"step": 2377
},
{
"epoch": 0.51,
"grad_norm": 0.12114948034286499,
"learning_rate": 5.0453577791337984e-06,
"loss": 0.4982,
"step": 2378
},
{
"epoch": 0.51,
"grad_norm": 0.14550864696502686,
"learning_rate": 5.041868804151694e-06,
"loss": 0.4555,
"step": 2379
},
{
"epoch": 0.51,
"grad_norm": 0.1462576687335968,
"learning_rate": 5.03837980878137e-06,
"loss": 0.5135,
"step": 2380
},
{
"epoch": 0.51,
"grad_norm": 0.1352759301662445,
"learning_rate": 5.0348907947218086e-06,
"loss": 0.5367,
"step": 2381
},
{
"epoch": 0.51,
"grad_norm": 0.18618960678577423,
"learning_rate": 5.031401763672003e-06,
"loss": 0.4918,
"step": 2382
},
{
"epoch": 0.51,
"grad_norm": 0.1655811071395874,
"learning_rate": 5.027912717330956e-06,
"loss": 0.5077,
"step": 2383
},
{
"epoch": 0.51,
"grad_norm": 0.14371387660503387,
"learning_rate": 5.024423657397674e-06,
"loss": 0.5463,
"step": 2384
},
{
"epoch": 0.51,
"grad_norm": 0.1331823766231537,
"learning_rate": 5.020934585571171e-06,
"loss": 0.5586,
"step": 2385
},
{
"epoch": 0.51,
"grad_norm": 0.16544833779335022,
"learning_rate": 5.017445503550471e-06,
"loss": 0.5493,
"step": 2386
},
{
"epoch": 0.51,
"grad_norm": 0.16902866959571838,
"learning_rate": 5.013956413034595e-06,
"loss": 0.5215,
"step": 2387
},
{
"epoch": 0.51,
"grad_norm": 0.19423706829547882,
"learning_rate": 5.010467315722578e-06,
"loss": 0.5343,
"step": 2388
},
{
"epoch": 0.51,
"grad_norm": 0.1521768569946289,
"learning_rate": 5.006978213313448e-06,
"loss": 0.5021,
"step": 2389
},
{
"epoch": 0.51,
"grad_norm": 0.12153864651918411,
"learning_rate": 5.003489107506243e-06,
"loss": 0.4893,
"step": 2390
},
{
"epoch": 0.52,
"grad_norm": 0.1757657527923584,
"learning_rate": 5e-06,
"loss": 0.535,
"step": 2391
},
{
"epoch": 0.52,
"grad_norm": 0.17673848569393158,
"learning_rate": 4.996510892493758e-06,
"loss": 0.5201,
"step": 2392
},
{
"epoch": 0.52,
"grad_norm": 0.17887622117996216,
"learning_rate": 4.993021786686554e-06,
"loss": 0.5413,
"step": 2393
},
{
"epoch": 0.52,
"grad_norm": 0.1362655609846115,
"learning_rate": 4.989532684277424e-06,
"loss": 0.4757,
"step": 2394
},
{
"epoch": 0.52,
"grad_norm": 0.21385332942008972,
"learning_rate": 4.986043586965406e-06,
"loss": 0.5233,
"step": 2395
},
{
"epoch": 0.52,
"grad_norm": 0.16764004528522491,
"learning_rate": 4.98255449644953e-06,
"loss": 0.5193,
"step": 2396
},
{
"epoch": 0.52,
"grad_norm": 0.12933380901813507,
"learning_rate": 4.979065414428829e-06,
"loss": 0.4681,
"step": 2397
},
{
"epoch": 0.52,
"grad_norm": 0.17438261210918427,
"learning_rate": 4.975576342602329e-06,
"loss": 0.5437,
"step": 2398
},
{
"epoch": 0.52,
"grad_norm": 0.1581277847290039,
"learning_rate": 4.9720872826690455e-06,
"loss": 0.5147,
"step": 2399
},
{
"epoch": 0.52,
"grad_norm": 0.15126928687095642,
"learning_rate": 4.968598236327998e-06,
"loss": 0.5033,
"step": 2400
},
{
"epoch": 0.52,
"grad_norm": 0.144017294049263,
"learning_rate": 4.965109205278193e-06,
"loss": 0.4557,
"step": 2401
},
{
"epoch": 0.52,
"grad_norm": 0.158042773604393,
"learning_rate": 4.961620191218632e-06,
"loss": 0.5118,
"step": 2402
},
{
"epoch": 0.52,
"grad_norm": 0.21210241317749023,
"learning_rate": 4.9581311958483075e-06,
"loss": 0.556,
"step": 2403
},
{
"epoch": 0.52,
"grad_norm": 0.25010186433792114,
"learning_rate": 4.954642220866202e-06,
"loss": 0.542,
"step": 2404
},
{
"epoch": 0.52,
"grad_norm": 0.21155287325382233,
"learning_rate": 4.95115326797129e-06,
"loss": 0.4882,
"step": 2405
},
{
"epoch": 0.52,
"grad_norm": 0.17543160915374756,
"learning_rate": 4.947664338862534e-06,
"loss": 0.5883,
"step": 2406
},
{
"epoch": 0.52,
"grad_norm": 0.1969243884086609,
"learning_rate": 4.944175435238886e-06,
"loss": 0.5051,
"step": 2407
},
{
"epoch": 0.52,
"grad_norm": 0.1602378487586975,
"learning_rate": 4.940686558799283e-06,
"loss": 0.5139,
"step": 2408
},
{
"epoch": 0.52,
"grad_norm": 0.1955273449420929,
"learning_rate": 4.9371977112426485e-06,
"loss": 0.5096,
"step": 2409
},
{
"epoch": 0.52,
"grad_norm": 0.15544743835926056,
"learning_rate": 4.933708894267901e-06,
"loss": 0.5081,
"step": 2410
},
{
"epoch": 0.52,
"grad_norm": 0.177435964345932,
"learning_rate": 4.93022010957393e-06,
"loss": 0.5476,
"step": 2411
},
{
"epoch": 0.52,
"grad_norm": 0.14814496040344238,
"learning_rate": 4.92673135885962e-06,
"loss": 0.5653,
"step": 2412
},
{
"epoch": 0.52,
"grad_norm": 0.144011989235878,
"learning_rate": 4.923242643823834e-06,
"loss": 0.6024,
"step": 2413
},
{
"epoch": 0.52,
"grad_norm": 0.16023319959640503,
"learning_rate": 4.919753966165419e-06,
"loss": 0.5927,
"step": 2414
},
{
"epoch": 0.52,
"grad_norm": 0.13633988797664642,
"learning_rate": 4.916265327583204e-06,
"loss": 0.5548,
"step": 2415
},
{
"epoch": 0.52,
"grad_norm": 0.20109489560127258,
"learning_rate": 4.912776729775999e-06,
"loss": 0.5668,
"step": 2416
},
{
"epoch": 0.52,
"grad_norm": 0.3419434428215027,
"learning_rate": 4.9092881744425944e-06,
"loss": 0.4842,
"step": 2417
},
{
"epoch": 0.52,
"grad_norm": 0.1448926031589508,
"learning_rate": 4.905799663281756e-06,
"loss": 0.4886,
"step": 2418
},
{
"epoch": 0.52,
"grad_norm": 0.2309703230857849,
"learning_rate": 4.902311197992234e-06,
"loss": 0.5237,
"step": 2419
},
{
"epoch": 0.52,
"grad_norm": 0.14190784096717834,
"learning_rate": 4.898822780272757e-06,
"loss": 0.5218,
"step": 2420
},
{
"epoch": 0.52,
"grad_norm": 0.18205609917640686,
"learning_rate": 4.895334411822019e-06,
"loss": 0.5251,
"step": 2421
},
{
"epoch": 0.52,
"grad_norm": 0.13188670575618744,
"learning_rate": 4.8918460943387065e-06,
"loss": 0.4971,
"step": 2422
},
{
"epoch": 0.52,
"grad_norm": 0.16982056200504303,
"learning_rate": 4.888357829521466e-06,
"loss": 0.4846,
"step": 2423
},
{
"epoch": 0.52,
"grad_norm": 0.12666776776313782,
"learning_rate": 4.8848696190689295e-06,
"loss": 0.4853,
"step": 2424
},
{
"epoch": 0.52,
"grad_norm": 0.1458110213279724,
"learning_rate": 4.881381464679698e-06,
"loss": 0.4871,
"step": 2425
},
{
"epoch": 0.52,
"grad_norm": 0.18324485421180725,
"learning_rate": 4.877893368052343e-06,
"loss": 0.545,
"step": 2426
},
{
"epoch": 0.52,
"grad_norm": 0.18099121749401093,
"learning_rate": 4.874405330885413e-06,
"loss": 0.5002,
"step": 2427
},
{
"epoch": 0.52,
"grad_norm": 0.13774168491363525,
"learning_rate": 4.870917354877421e-06,
"loss": 0.4789,
"step": 2428
},
{
"epoch": 0.52,
"grad_norm": 0.16247624158859253,
"learning_rate": 4.867429441726858e-06,
"loss": 0.5491,
"step": 2429
},
{
"epoch": 0.52,
"grad_norm": 0.2958735525608063,
"learning_rate": 4.863941593132179e-06,
"loss": 0.5158,
"step": 2430
},
{
"epoch": 0.52,
"grad_norm": 0.1791061908006668,
"learning_rate": 4.860453810791808e-06,
"loss": 0.5083,
"step": 2431
},
{
"epoch": 0.52,
"grad_norm": 0.15824836492538452,
"learning_rate": 4.856966096404141e-06,
"loss": 0.5177,
"step": 2432
},
{
"epoch": 0.52,
"grad_norm": 0.15134254097938538,
"learning_rate": 4.853478451667532e-06,
"loss": 0.4666,
"step": 2433
},
{
"epoch": 0.52,
"grad_norm": 0.14412038028240204,
"learning_rate": 4.849990878280313e-06,
"loss": 0.5838,
"step": 2434
},
{
"epoch": 0.52,
"grad_norm": 0.1476101279258728,
"learning_rate": 4.84650337794077e-06,
"loss": 0.5155,
"step": 2435
},
{
"epoch": 0.52,
"grad_norm": 0.13752271234989166,
"learning_rate": 4.843015952347159e-06,
"loss": 0.5225,
"step": 2436
},
{
"epoch": 0.52,
"grad_norm": 0.1495019495487213,
"learning_rate": 4.839528603197702e-06,
"loss": 0.5148,
"step": 2437
},
{
"epoch": 0.53,
"grad_norm": 0.18052443861961365,
"learning_rate": 4.8360413321905786e-06,
"loss": 0.5321,
"step": 2438
},
{
"epoch": 0.53,
"grad_norm": 0.16000008583068848,
"learning_rate": 4.832554141023934e-06,
"loss": 0.5374,
"step": 2439
},
{
"epoch": 0.53,
"grad_norm": 0.14435116946697235,
"learning_rate": 4.829067031395871e-06,
"loss": 0.4763,
"step": 2440
},
{
"epoch": 0.53,
"grad_norm": 0.1648446023464203,
"learning_rate": 4.825580005004456e-06,
"loss": 0.5029,
"step": 2441
},
{
"epoch": 0.53,
"grad_norm": 0.1948603093624115,
"learning_rate": 4.822093063547715e-06,
"loss": 0.517,
"step": 2442
},
{
"epoch": 0.53,
"grad_norm": 0.1540631800889969,
"learning_rate": 4.818606208723627e-06,
"loss": 0.5113,
"step": 2443
},
{
"epoch": 0.53,
"grad_norm": 0.14734607934951782,
"learning_rate": 4.815119442230138e-06,
"loss": 0.5323,
"step": 2444
},
{
"epoch": 0.53,
"grad_norm": 0.13016067445278168,
"learning_rate": 4.811632765765143e-06,
"loss": 0.4841,
"step": 2445
},
{
"epoch": 0.53,
"grad_norm": 0.1437351554632187,
"learning_rate": 4.8081461810264955e-06,
"loss": 0.4775,
"step": 2446
},
{
"epoch": 0.53,
"grad_norm": 0.16473154723644257,
"learning_rate": 4.804659689712009e-06,
"loss": 0.5019,
"step": 2447
},
{
"epoch": 0.53,
"grad_norm": 0.13416069746017456,
"learning_rate": 4.801173293519442e-06,
"loss": 0.5193,
"step": 2448
},
{
"epoch": 0.53,
"grad_norm": 0.12704534828662872,
"learning_rate": 4.797686994146519e-06,
"loss": 0.499,
"step": 2449
},
{
"epoch": 0.53,
"grad_norm": 0.15111473202705383,
"learning_rate": 4.7942007932909046e-06,
"loss": 0.5168,
"step": 2450
},
{
"epoch": 0.53,
"grad_norm": 0.13630732893943787,
"learning_rate": 4.790714692650223e-06,
"loss": 0.4938,
"step": 2451
},
{
"epoch": 0.53,
"grad_norm": 0.13137710094451904,
"learning_rate": 4.7872286939220516e-06,
"loss": 0.4544,
"step": 2452
},
{
"epoch": 0.53,
"grad_norm": 0.15518240630626678,
"learning_rate": 4.783742798803909e-06,
"loss": 0.5013,
"step": 2453
},
{
"epoch": 0.53,
"grad_norm": 0.13857389986515045,
"learning_rate": 4.7802570089932746e-06,
"loss": 0.5551,
"step": 2454
},
{
"epoch": 0.53,
"grad_norm": 0.1502048522233963,
"learning_rate": 4.776771326187566e-06,
"loss": 0.4341,
"step": 2455
},
{
"epoch": 0.53,
"grad_norm": 0.16226448118686676,
"learning_rate": 4.773285752084154e-06,
"loss": 0.5555,
"step": 2456
},
{
"epoch": 0.53,
"grad_norm": 0.15207113325595856,
"learning_rate": 4.769800288380361e-06,
"loss": 0.4934,
"step": 2457
},
{
"epoch": 0.53,
"grad_norm": 0.16286228597164154,
"learning_rate": 4.766314936773445e-06,
"loss": 0.5066,
"step": 2458
},
{
"epoch": 0.53,
"grad_norm": 0.15286804735660553,
"learning_rate": 4.762829698960618e-06,
"loss": 0.5425,
"step": 2459
},
{
"epoch": 0.53,
"grad_norm": 0.149344801902771,
"learning_rate": 4.7593445766390315e-06,
"loss": 0.5626,
"step": 2460
},
{
"epoch": 0.53,
"grad_norm": 0.1389455944299698,
"learning_rate": 4.755859571505786e-06,
"loss": 0.4964,
"step": 2461
},
{
"epoch": 0.53,
"grad_norm": 0.14913085103034973,
"learning_rate": 4.752374685257919e-06,
"loss": 0.524,
"step": 2462
},
{
"epoch": 0.53,
"grad_norm": 0.14657723903656006,
"learning_rate": 4.748889919592414e-06,
"loss": 0.5059,
"step": 2463
},
{
"epoch": 0.53,
"grad_norm": 0.12738269567489624,
"learning_rate": 4.745405276206196e-06,
"loss": 0.5039,
"step": 2464
},
{
"epoch": 0.53,
"grad_norm": 0.2088775783777237,
"learning_rate": 4.741920756796126e-06,
"loss": 0.5238,
"step": 2465
},
{
"epoch": 0.53,
"grad_norm": 0.1429111510515213,
"learning_rate": 4.738436363059013e-06,
"loss": 0.4606,
"step": 2466
},
{
"epoch": 0.53,
"grad_norm": 0.1563674658536911,
"learning_rate": 4.734952096691594e-06,
"loss": 0.5982,
"step": 2467
},
{
"epoch": 0.53,
"grad_norm": 0.15420180559158325,
"learning_rate": 4.731467959390552e-06,
"loss": 0.515,
"step": 2468
},
{
"epoch": 0.53,
"grad_norm": 0.21299295127391815,
"learning_rate": 4.727983952852505e-06,
"loss": 0.5306,
"step": 2469
},
{
"epoch": 0.53,
"grad_norm": 0.15745538473129272,
"learning_rate": 4.724500078774008e-06,
"loss": 0.5118,
"step": 2470
},
{
"epoch": 0.53,
"grad_norm": 0.1578780859708786,
"learning_rate": 4.721016338851549e-06,
"loss": 0.5061,
"step": 2471
},
{
"epoch": 0.53,
"grad_norm": 0.1522160917520523,
"learning_rate": 4.717532734781552e-06,
"loss": 0.5417,
"step": 2472
},
{
"epoch": 0.53,
"grad_norm": 0.12511098384857178,
"learning_rate": 4.714049268260376e-06,
"loss": 0.4981,
"step": 2473
},
{
"epoch": 0.53,
"grad_norm": 0.1434258371591568,
"learning_rate": 4.710565940984313e-06,
"loss": 0.5178,
"step": 2474
},
{
"epoch": 0.53,
"grad_norm": 0.13308405876159668,
"learning_rate": 4.707082754649584e-06,
"loss": 0.4986,
"step": 2475
},
{
"epoch": 0.53,
"grad_norm": 0.15585026144981384,
"learning_rate": 4.703599710952347e-06,
"loss": 0.5179,
"step": 2476
},
{
"epoch": 0.53,
"grad_norm": 0.1660911738872528,
"learning_rate": 4.700116811588684e-06,
"loss": 0.4997,
"step": 2477
},
{
"epoch": 0.53,
"grad_norm": 0.1638256311416626,
"learning_rate": 4.6966340582546085e-06,
"loss": 0.4711,
"step": 2478
},
{
"epoch": 0.53,
"grad_norm": 0.14776884019374847,
"learning_rate": 4.693151452646071e-06,
"loss": 0.47,
"step": 2479
},
{
"epoch": 0.53,
"grad_norm": 0.15483321249485016,
"learning_rate": 4.689668996458937e-06,
"loss": 0.5476,
"step": 2480
},
{
"epoch": 0.53,
"grad_norm": 0.18256203830242157,
"learning_rate": 4.6861866913890094e-06,
"loss": 0.5002,
"step": 2481
},
{
"epoch": 0.53,
"grad_norm": 0.1372958868741989,
"learning_rate": 4.682704539132011e-06,
"loss": 0.5201,
"step": 2482
},
{
"epoch": 0.53,
"grad_norm": 0.17966903746128082,
"learning_rate": 4.679222541383594e-06,
"loss": 0.4896,
"step": 2483
},
{
"epoch": 0.54,
"grad_norm": 0.16434355080127716,
"learning_rate": 4.6757406998393354e-06,
"loss": 0.5577,
"step": 2484
},
{
"epoch": 0.54,
"grad_norm": 0.12742279469966888,
"learning_rate": 4.672259016194733e-06,
"loss": 0.5662,
"step": 2485
},
{
"epoch": 0.54,
"grad_norm": 0.14353856444358826,
"learning_rate": 4.668777492145212e-06,
"loss": 0.5476,
"step": 2486
},
{
"epoch": 0.54,
"grad_norm": 0.17390471696853638,
"learning_rate": 4.665296129386116e-06,
"loss": 0.5625,
"step": 2487
},
{
"epoch": 0.54,
"grad_norm": 0.16890183091163635,
"learning_rate": 4.661814929612713e-06,
"loss": 0.5211,
"step": 2488
},
{
"epoch": 0.54,
"grad_norm": 0.16704991459846497,
"learning_rate": 4.658333894520189e-06,
"loss": 0.4941,
"step": 2489
},
{
"epoch": 0.54,
"grad_norm": 0.15086905658245087,
"learning_rate": 4.654853025803649e-06,
"loss": 0.5065,
"step": 2490
},
{
"epoch": 0.54,
"grad_norm": 0.13586142659187317,
"learning_rate": 4.651372325158125e-06,
"loss": 0.5415,
"step": 2491
},
{
"epoch": 0.54,
"grad_norm": 0.17813622951507568,
"learning_rate": 4.6478917942785575e-06,
"loss": 0.5101,
"step": 2492
},
{
"epoch": 0.54,
"grad_norm": 0.16348902881145477,
"learning_rate": 4.644411434859808e-06,
"loss": 0.4916,
"step": 2493
},
{
"epoch": 0.54,
"grad_norm": 0.17885281145572662,
"learning_rate": 4.640931248596655e-06,
"loss": 0.4749,
"step": 2494
},
{
"epoch": 0.54,
"grad_norm": 0.15020768344402313,
"learning_rate": 4.637451237183792e-06,
"loss": 0.5273,
"step": 2495
},
{
"epoch": 0.54,
"grad_norm": 0.15204519033432007,
"learning_rate": 4.633971402315828e-06,
"loss": 0.5244,
"step": 2496
},
{
"epoch": 0.54,
"grad_norm": 0.15182174742221832,
"learning_rate": 4.630491745687286e-06,
"loss": 0.4928,
"step": 2497
},
{
"epoch": 0.54,
"grad_norm": 0.163527712225914,
"learning_rate": 4.627012268992603e-06,
"loss": 0.5102,
"step": 2498
},
{
"epoch": 0.54,
"grad_norm": 0.1811029613018036,
"learning_rate": 4.623532973926124e-06,
"loss": 0.5091,
"step": 2499
},
{
"epoch": 0.54,
"grad_norm": 0.17676551640033722,
"learning_rate": 4.62005386218211e-06,
"loss": 0.5543,
"step": 2500
},
{
"epoch": 0.54,
"grad_norm": 0.14058449864387512,
"learning_rate": 4.616574935454735e-06,
"loss": 0.4906,
"step": 2501
},
{
"epoch": 0.54,
"grad_norm": 0.14341934025287628,
"learning_rate": 4.613096195438074e-06,
"loss": 0.5007,
"step": 2502
},
{
"epoch": 0.54,
"grad_norm": 0.17558392882347107,
"learning_rate": 4.609617643826121e-06,
"loss": 0.4882,
"step": 2503
},
{
"epoch": 0.54,
"grad_norm": 0.13475576043128967,
"learning_rate": 4.60613928231277e-06,
"loss": 0.5144,
"step": 2504
},
{
"epoch": 0.54,
"grad_norm": 0.158226877450943,
"learning_rate": 4.602661112591829e-06,
"loss": 0.5136,
"step": 2505
},
{
"epoch": 0.54,
"grad_norm": 0.1458200067281723,
"learning_rate": 4.59918313635701e-06,
"loss": 0.4688,
"step": 2506
},
{
"epoch": 0.54,
"grad_norm": 0.19686149060726166,
"learning_rate": 4.595705355301927e-06,
"loss": 0.5185,
"step": 2507
},
{
"epoch": 0.54,
"grad_norm": 0.1282099336385727,
"learning_rate": 4.592227771120108e-06,
"loss": 0.5569,
"step": 2508
},
{
"epoch": 0.54,
"grad_norm": 0.18009676039218903,
"learning_rate": 4.588750385504975e-06,
"loss": 0.4903,
"step": 2509
},
{
"epoch": 0.54,
"grad_norm": 0.21005766093730927,
"learning_rate": 4.585273200149859e-06,
"loss": 0.5475,
"step": 2510
},
{
"epoch": 0.54,
"grad_norm": 0.12568634748458862,
"learning_rate": 4.581796216747996e-06,
"loss": 0.5061,
"step": 2511
},
{
"epoch": 0.54,
"grad_norm": 0.14265067875385284,
"learning_rate": 4.578319436992515e-06,
"loss": 0.4862,
"step": 2512
},
{
"epoch": 0.54,
"grad_norm": 0.14382900297641754,
"learning_rate": 4.574842862576455e-06,
"loss": 0.5384,
"step": 2513
},
{
"epoch": 0.54,
"grad_norm": 0.20735333859920502,
"learning_rate": 4.5713664951927475e-06,
"loss": 0.4868,
"step": 2514
},
{
"epoch": 0.54,
"grad_norm": 0.14771424233913422,
"learning_rate": 4.56789033653423e-06,
"loss": 0.4837,
"step": 2515
},
{
"epoch": 0.54,
"grad_norm": 0.1805439293384552,
"learning_rate": 4.5644143882936316e-06,
"loss": 0.5152,
"step": 2516
},
{
"epoch": 0.54,
"grad_norm": 0.16125157475471497,
"learning_rate": 4.560938652163585e-06,
"loss": 0.514,
"step": 2517
},
{
"epoch": 0.54,
"grad_norm": 0.16599629819393158,
"learning_rate": 4.5574631298366165e-06,
"loss": 0.4994,
"step": 2518
},
{
"epoch": 0.54,
"grad_norm": 0.15320484340190887,
"learning_rate": 4.553987823005148e-06,
"loss": 0.4958,
"step": 2519
},
{
"epoch": 0.54,
"grad_norm": 0.1553090214729309,
"learning_rate": 4.550512733361499e-06,
"loss": 0.5354,
"step": 2520
},
{
"epoch": 0.54,
"grad_norm": 0.14517144858837128,
"learning_rate": 4.5470378625978775e-06,
"loss": 0.5354,
"step": 2521
},
{
"epoch": 0.54,
"grad_norm": 0.2318032830953598,
"learning_rate": 4.543563212406392e-06,
"loss": 0.5206,
"step": 2522
},
{
"epoch": 0.54,
"grad_norm": 0.15330053865909576,
"learning_rate": 4.540088784479043e-06,
"loss": 0.4928,
"step": 2523
},
{
"epoch": 0.54,
"grad_norm": 0.16507619619369507,
"learning_rate": 4.536614580507714e-06,
"loss": 0.5271,
"step": 2524
},
{
"epoch": 0.54,
"grad_norm": 0.2185535430908203,
"learning_rate": 4.53314060218419e-06,
"loss": 0.4909,
"step": 2525
},
{
"epoch": 0.54,
"grad_norm": 0.1298246681690216,
"learning_rate": 4.52966685120014e-06,
"loss": 0.4953,
"step": 2526
},
{
"epoch": 0.54,
"grad_norm": 0.16641001403331757,
"learning_rate": 4.526193329247124e-06,
"loss": 0.5287,
"step": 2527
},
{
"epoch": 0.54,
"grad_norm": 0.1725553274154663,
"learning_rate": 4.5227200380165925e-06,
"loss": 0.5028,
"step": 2528
},
{
"epoch": 0.54,
"grad_norm": 0.14473970234394073,
"learning_rate": 4.51924697919988e-06,
"loss": 0.5306,
"step": 2529
},
{
"epoch": 0.54,
"grad_norm": 0.15652526915073395,
"learning_rate": 4.51577415448821e-06,
"loss": 0.5716,
"step": 2530
},
{
"epoch": 0.55,
"grad_norm": 0.127789705991745,
"learning_rate": 4.512301565572691e-06,
"loss": 0.501,
"step": 2531
},
{
"epoch": 0.55,
"grad_norm": 0.1573222577571869,
"learning_rate": 4.508829214144318e-06,
"loss": 0.5025,
"step": 2532
},
{
"epoch": 0.55,
"grad_norm": 0.14756949245929718,
"learning_rate": 4.5053571018939715e-06,
"loss": 0.5278,
"step": 2533
},
{
"epoch": 0.55,
"grad_norm": 0.16021455824375153,
"learning_rate": 4.5018852305124075e-06,
"loss": 0.4744,
"step": 2534
},
{
"epoch": 0.55,
"grad_norm": 0.16380415856838226,
"learning_rate": 4.498413601690278e-06,
"loss": 0.5568,
"step": 2535
},
{
"epoch": 0.55,
"grad_norm": 0.18932676315307617,
"learning_rate": 4.494942217118105e-06,
"loss": 0.4957,
"step": 2536
},
{
"epoch": 0.55,
"grad_norm": 0.18895933032035828,
"learning_rate": 4.491471078486297e-06,
"loss": 0.5109,
"step": 2537
},
{
"epoch": 0.55,
"grad_norm": 0.14104638993740082,
"learning_rate": 4.488000187485144e-06,
"loss": 0.5168,
"step": 2538
},
{
"epoch": 0.55,
"grad_norm": 0.13941968977451324,
"learning_rate": 4.484529545804811e-06,
"loss": 0.6087,
"step": 2539
},
{
"epoch": 0.55,
"grad_norm": 0.1779303252696991,
"learning_rate": 4.481059155135346e-06,
"loss": 0.5274,
"step": 2540
},
{
"epoch": 0.55,
"grad_norm": 0.16781829297542572,
"learning_rate": 4.477589017166671e-06,
"loss": 0.5129,
"step": 2541
},
{
"epoch": 0.55,
"grad_norm": 0.149240642786026,
"learning_rate": 4.474119133588588e-06,
"loss": 0.5388,
"step": 2542
},
{
"epoch": 0.55,
"grad_norm": 0.22637441754341125,
"learning_rate": 4.470649506090772e-06,
"loss": 0.5118,
"step": 2543
},
{
"epoch": 0.55,
"grad_norm": 0.1592012643814087,
"learning_rate": 4.4671801363627776e-06,
"loss": 0.5295,
"step": 2544
},
{
"epoch": 0.55,
"grad_norm": 0.18590115010738373,
"learning_rate": 4.463711026094032e-06,
"loss": 0.4976,
"step": 2545
},
{
"epoch": 0.55,
"grad_norm": 0.2293752282857895,
"learning_rate": 4.460242176973829e-06,
"loss": 0.54,
"step": 2546
},
{
"epoch": 0.55,
"grad_norm": 0.13310395181179047,
"learning_rate": 4.456773590691352e-06,
"loss": 0.5073,
"step": 2547
},
{
"epoch": 0.55,
"grad_norm": 0.1677958369255066,
"learning_rate": 4.453305268935637e-06,
"loss": 0.5132,
"step": 2548
},
{
"epoch": 0.55,
"grad_norm": 0.1587441861629486,
"learning_rate": 4.4498372133956046e-06,
"loss": 0.4854,
"step": 2549
},
{
"epoch": 0.55,
"grad_norm": 0.16079290211200714,
"learning_rate": 4.446369425760042e-06,
"loss": 0.4615,
"step": 2550
},
{
"epoch": 0.55,
"grad_norm": 0.1533065140247345,
"learning_rate": 4.442901907717603e-06,
"loss": 0.487,
"step": 2551
},
{
"epoch": 0.55,
"grad_norm": 0.17068631947040558,
"learning_rate": 4.439434660956814e-06,
"loss": 0.5596,
"step": 2552
},
{
"epoch": 0.55,
"grad_norm": 0.15625819563865662,
"learning_rate": 4.4359676871660665e-06,
"loss": 0.5136,
"step": 2553
},
{
"epoch": 0.55,
"grad_norm": 0.16105841100215912,
"learning_rate": 4.432500988033621e-06,
"loss": 0.5351,
"step": 2554
},
{
"epoch": 0.55,
"grad_norm": 0.16557753086090088,
"learning_rate": 4.429034565247606e-06,
"loss": 0.491,
"step": 2555
},
{
"epoch": 0.55,
"grad_norm": 0.151271790266037,
"learning_rate": 4.42556842049601e-06,
"loss": 0.5274,
"step": 2556
},
{
"epoch": 0.55,
"grad_norm": 0.1744944155216217,
"learning_rate": 4.422102555466691e-06,
"loss": 0.5155,
"step": 2557
},
{
"epoch": 0.55,
"grad_norm": 0.14737199246883392,
"learning_rate": 4.418636971847367e-06,
"loss": 0.5281,
"step": 2558
},
{
"epoch": 0.55,
"grad_norm": 0.16449463367462158,
"learning_rate": 4.415171671325622e-06,
"loss": 0.5183,
"step": 2559
},
{
"epoch": 0.55,
"grad_norm": 0.12860900163650513,
"learning_rate": 4.4117066555889045e-06,
"loss": 0.4566,
"step": 2560
},
{
"epoch": 0.55,
"grad_norm": 0.1798088699579239,
"learning_rate": 4.408241926324515e-06,
"loss": 0.5072,
"step": 2561
},
{
"epoch": 0.55,
"grad_norm": 0.1573198288679123,
"learning_rate": 4.404777485219624e-06,
"loss": 0.5375,
"step": 2562
},
{
"epoch": 0.55,
"grad_norm": 0.12489344924688339,
"learning_rate": 4.401313333961257e-06,
"loss": 0.4767,
"step": 2563
},
{
"epoch": 0.55,
"grad_norm": 0.1495581567287445,
"learning_rate": 4.397849474236299e-06,
"loss": 0.5327,
"step": 2564
},
{
"epoch": 0.55,
"grad_norm": 0.21391817927360535,
"learning_rate": 4.3943859077314956e-06,
"loss": 0.536,
"step": 2565
},
{
"epoch": 0.55,
"grad_norm": 0.1484507918357849,
"learning_rate": 4.390922636133444e-06,
"loss": 0.4943,
"step": 2566
},
{
"epoch": 0.55,
"grad_norm": 0.16837376356124878,
"learning_rate": 4.3874596611286076e-06,
"loss": 0.544,
"step": 2567
},
{
"epoch": 0.55,
"grad_norm": 0.1593373417854309,
"learning_rate": 4.38399698440329e-06,
"loss": 0.5119,
"step": 2568
},
{
"epoch": 0.55,
"grad_norm": 0.18334493041038513,
"learning_rate": 4.380534607643668e-06,
"loss": 0.5283,
"step": 2569
},
{
"epoch": 0.55,
"grad_norm": 0.1494050920009613,
"learning_rate": 4.377072532535756e-06,
"loss": 0.5343,
"step": 2570
},
{
"epoch": 0.55,
"grad_norm": 0.1535796821117401,
"learning_rate": 4.37361076076543e-06,
"loss": 0.5707,
"step": 2571
},
{
"epoch": 0.55,
"grad_norm": 0.22966866195201874,
"learning_rate": 4.370149294018419e-06,
"loss": 0.5478,
"step": 2572
},
{
"epoch": 0.55,
"grad_norm": 0.166322723031044,
"learning_rate": 4.366688133980299e-06,
"loss": 0.5321,
"step": 2573
},
{
"epoch": 0.55,
"grad_norm": 0.1559196412563324,
"learning_rate": 4.3632272823365004e-06,
"loss": 0.4929,
"step": 2574
},
{
"epoch": 0.55,
"grad_norm": 0.13609760999679565,
"learning_rate": 4.359766740772301e-06,
"loss": 0.5255,
"step": 2575
},
{
"epoch": 0.55,
"grad_norm": 0.17465785145759583,
"learning_rate": 4.356306510972829e-06,
"loss": 0.4871,
"step": 2576
},
{
"epoch": 0.56,
"grad_norm": 0.16919800639152527,
"learning_rate": 4.35284659462306e-06,
"loss": 0.5335,
"step": 2577
},
{
"epoch": 0.56,
"grad_norm": 0.20923703908920288,
"learning_rate": 4.349386993407817e-06,
"loss": 0.5549,
"step": 2578
},
{
"epoch": 0.56,
"grad_norm": 0.14986181259155273,
"learning_rate": 4.345927709011771e-06,
"loss": 0.5111,
"step": 2579
},
{
"epoch": 0.56,
"grad_norm": 0.151445209980011,
"learning_rate": 4.342468743119436e-06,
"loss": 0.5129,
"step": 2580
},
{
"epoch": 0.56,
"grad_norm": 0.14397896826267242,
"learning_rate": 4.3390100974151715e-06,
"loss": 0.4842,
"step": 2581
},
{
"epoch": 0.56,
"grad_norm": 0.16390201449394226,
"learning_rate": 4.335551773583186e-06,
"loss": 0.4678,
"step": 2582
},
{
"epoch": 0.56,
"grad_norm": 0.16361331939697266,
"learning_rate": 4.332093773307523e-06,
"loss": 0.5084,
"step": 2583
},
{
"epoch": 0.56,
"grad_norm": 0.17257274687290192,
"learning_rate": 4.328636098272075e-06,
"loss": 0.5223,
"step": 2584
},
{
"epoch": 0.56,
"grad_norm": 0.14300677180290222,
"learning_rate": 4.325178750160573e-06,
"loss": 0.4712,
"step": 2585
},
{
"epoch": 0.56,
"grad_norm": 0.18836241960525513,
"learning_rate": 4.32172173065659e-06,
"loss": 0.5017,
"step": 2586
},
{
"epoch": 0.56,
"grad_norm": 0.1701335459947586,
"learning_rate": 4.318265041443538e-06,
"loss": 0.4977,
"step": 2587
},
{
"epoch": 0.56,
"grad_norm": 0.18600672483444214,
"learning_rate": 4.31480868420467e-06,
"loss": 0.5121,
"step": 2588
},
{
"epoch": 0.56,
"grad_norm": 0.13420304656028748,
"learning_rate": 4.311352660623076e-06,
"loss": 0.4936,
"step": 2589
},
{
"epoch": 0.56,
"grad_norm": 0.19042494893074036,
"learning_rate": 4.307896972381681e-06,
"loss": 0.5553,
"step": 2590
},
{
"epoch": 0.56,
"grad_norm": 0.14162546396255493,
"learning_rate": 4.304441621163252e-06,
"loss": 0.547,
"step": 2591
},
{
"epoch": 0.56,
"grad_norm": 0.17262719571590424,
"learning_rate": 4.3009866086503905e-06,
"loss": 0.5414,
"step": 2592
},
{
"epoch": 0.56,
"grad_norm": 0.1511780321598053,
"learning_rate": 4.297531936525528e-06,
"loss": 0.4973,
"step": 2593
},
{
"epoch": 0.56,
"grad_norm": 0.15445083379745483,
"learning_rate": 4.294077606470937e-06,
"loss": 0.5506,
"step": 2594
},
{
"epoch": 0.56,
"grad_norm": 0.22606535255908966,
"learning_rate": 4.2906236201687186e-06,
"loss": 0.5627,
"step": 2595
},
{
"epoch": 0.56,
"grad_norm": 0.2494857758283615,
"learning_rate": 4.28716997930081e-06,
"loss": 0.5328,
"step": 2596
},
{
"epoch": 0.56,
"grad_norm": 0.1547478884458542,
"learning_rate": 4.283716685548976e-06,
"loss": 0.5037,
"step": 2597
},
{
"epoch": 0.56,
"grad_norm": 0.17305047810077667,
"learning_rate": 4.2802637405948175e-06,
"loss": 0.4701,
"step": 2598
},
{
"epoch": 0.56,
"grad_norm": 0.20879824459552765,
"learning_rate": 4.2768111461197635e-06,
"loss": 0.5721,
"step": 2599
},
{
"epoch": 0.56,
"grad_norm": 0.20338691771030426,
"learning_rate": 4.273358903805069e-06,
"loss": 0.4916,
"step": 2600
},
{
"epoch": 0.56,
"grad_norm": 0.1474212110042572,
"learning_rate": 4.2699070153318244e-06,
"loss": 0.5426,
"step": 2601
},
{
"epoch": 0.56,
"grad_norm": 0.1909620314836502,
"learning_rate": 4.266455482380938e-06,
"loss": 0.4591,
"step": 2602
},
{
"epoch": 0.56,
"grad_norm": 0.16102322936058044,
"learning_rate": 4.2630043066331536e-06,
"loss": 0.4825,
"step": 2603
},
{
"epoch": 0.56,
"grad_norm": 0.174557164311409,
"learning_rate": 4.2595534897690415e-06,
"loss": 0.5141,
"step": 2604
},
{
"epoch": 0.56,
"grad_norm": 0.17708678543567657,
"learning_rate": 4.256103033468989e-06,
"loss": 0.5301,
"step": 2605
},
{
"epoch": 0.56,
"grad_norm": 0.13558730483055115,
"learning_rate": 4.252652939413215e-06,
"loss": 0.4784,
"step": 2606
},
{
"epoch": 0.56,
"grad_norm": 0.188698410987854,
"learning_rate": 4.24920320928176e-06,
"loss": 0.5073,
"step": 2607
},
{
"epoch": 0.56,
"grad_norm": 0.181773841381073,
"learning_rate": 4.245753844754484e-06,
"loss": 0.5205,
"step": 2608
},
{
"epoch": 0.56,
"grad_norm": 0.15207915008068085,
"learning_rate": 4.242304847511076e-06,
"loss": 0.5098,
"step": 2609
},
{
"epoch": 0.56,
"grad_norm": 0.17972496151924133,
"learning_rate": 4.23885621923104e-06,
"loss": 0.5511,
"step": 2610
},
{
"epoch": 0.56,
"grad_norm": 0.14959251880645752,
"learning_rate": 4.235407961593704e-06,
"loss": 0.49,
"step": 2611
},
{
"epoch": 0.56,
"grad_norm": 0.1577451229095459,
"learning_rate": 4.231960076278211e-06,
"loss": 0.4616,
"step": 2612
},
{
"epoch": 0.56,
"grad_norm": 0.1617031991481781,
"learning_rate": 4.228512564963528e-06,
"loss": 0.5371,
"step": 2613
},
{
"epoch": 0.56,
"grad_norm": 0.15706071257591248,
"learning_rate": 4.225065429328439e-06,
"loss": 0.4847,
"step": 2614
},
{
"epoch": 0.56,
"grad_norm": 0.14980901777744293,
"learning_rate": 4.221618671051539e-06,
"loss": 0.5232,
"step": 2615
},
{
"epoch": 0.56,
"grad_norm": 0.15324559807777405,
"learning_rate": 4.218172291811249e-06,
"loss": 0.5333,
"step": 2616
},
{
"epoch": 0.56,
"grad_norm": 0.13998126983642578,
"learning_rate": 4.214726293285797e-06,
"loss": 0.5366,
"step": 2617
},
{
"epoch": 0.56,
"grad_norm": 0.16418395936489105,
"learning_rate": 4.211280677153228e-06,
"loss": 0.5233,
"step": 2618
},
{
"epoch": 0.56,
"grad_norm": 0.16183216869831085,
"learning_rate": 4.207835445091405e-06,
"loss": 0.4953,
"step": 2619
},
{
"epoch": 0.56,
"grad_norm": 0.15545772016048431,
"learning_rate": 4.204390598777999e-06,
"loss": 0.5336,
"step": 2620
},
{
"epoch": 0.56,
"grad_norm": 0.1599649339914322,
"learning_rate": 4.2009461398904955e-06,
"loss": 0.5047,
"step": 2621
},
{
"epoch": 0.56,
"grad_norm": 0.178667351603508,
"learning_rate": 4.1975020701061884e-06,
"loss": 0.5114,
"step": 2622
},
{
"epoch": 0.57,
"grad_norm": 0.16403385996818542,
"learning_rate": 4.194058391102188e-06,
"loss": 0.5077,
"step": 2623
},
{
"epoch": 0.57,
"grad_norm": 0.16363531351089478,
"learning_rate": 4.190615104555407e-06,
"loss": 0.5107,
"step": 2624
},
{
"epoch": 0.57,
"grad_norm": 0.1554226130247116,
"learning_rate": 4.1871722121425725e-06,
"loss": 0.489,
"step": 2625
},
{
"epoch": 0.57,
"grad_norm": 0.14770759642124176,
"learning_rate": 4.1837297155402204e-06,
"loss": 0.5776,
"step": 2626
},
{
"epoch": 0.57,
"grad_norm": 0.17107781767845154,
"learning_rate": 4.180287616424685e-06,
"loss": 0.4841,
"step": 2627
},
{
"epoch": 0.57,
"grad_norm": 0.17729692161083221,
"learning_rate": 4.17684591647212e-06,
"loss": 0.5217,
"step": 2628
},
{
"epoch": 0.57,
"grad_norm": 0.12309854477643967,
"learning_rate": 4.173404617358473e-06,
"loss": 0.5291,
"step": 2629
},
{
"epoch": 0.57,
"grad_norm": 0.1765958070755005,
"learning_rate": 4.1699637207595035e-06,
"loss": 0.5339,
"step": 2630
},
{
"epoch": 0.57,
"grad_norm": 0.13170979917049408,
"learning_rate": 4.166523228350775e-06,
"loss": 0.4824,
"step": 2631
},
{
"epoch": 0.57,
"grad_norm": 0.16714021563529968,
"learning_rate": 4.163083141807648e-06,
"loss": 0.5273,
"step": 2632
},
{
"epoch": 0.57,
"grad_norm": 0.14370590448379517,
"learning_rate": 4.159643462805293e-06,
"loss": 0.5099,
"step": 2633
},
{
"epoch": 0.57,
"grad_norm": 0.16657981276512146,
"learning_rate": 4.156204193018677e-06,
"loss": 0.5525,
"step": 2634
},
{
"epoch": 0.57,
"grad_norm": 0.16202954947948456,
"learning_rate": 4.152765334122569e-06,
"loss": 0.514,
"step": 2635
},
{
"epoch": 0.57,
"grad_norm": 0.16040794551372528,
"learning_rate": 4.149326887791541e-06,
"loss": 0.506,
"step": 2636
},
{
"epoch": 0.57,
"grad_norm": 0.20684373378753662,
"learning_rate": 4.145888855699957e-06,
"loss": 0.4962,
"step": 2637
},
{
"epoch": 0.57,
"grad_norm": 0.1377829760313034,
"learning_rate": 4.142451239521988e-06,
"loss": 0.5331,
"step": 2638
},
{
"epoch": 0.57,
"grad_norm": 0.1686798632144928,
"learning_rate": 4.139014040931594e-06,
"loss": 0.4454,
"step": 2639
},
{
"epoch": 0.57,
"grad_norm": 0.14603053033351898,
"learning_rate": 4.135577261602537e-06,
"loss": 0.4832,
"step": 2640
},
{
"epoch": 0.57,
"grad_norm": 0.14471474289894104,
"learning_rate": 4.132140903208376e-06,
"loss": 0.5147,
"step": 2641
},
{
"epoch": 0.57,
"grad_norm": 0.17779991030693054,
"learning_rate": 4.128704967422458e-06,
"loss": 0.5427,
"step": 2642
},
{
"epoch": 0.57,
"grad_norm": 0.15965478122234344,
"learning_rate": 4.125269455917934e-06,
"loss": 0.5276,
"step": 2643
},
{
"epoch": 0.57,
"grad_norm": 0.13071952760219574,
"learning_rate": 4.1218343703677385e-06,
"loss": 0.5247,
"step": 2644
},
{
"epoch": 0.57,
"grad_norm": 0.1529112011194229,
"learning_rate": 4.118399712444607e-06,
"loss": 0.4814,
"step": 2645
},
{
"epoch": 0.57,
"grad_norm": 0.1327049285173416,
"learning_rate": 4.114965483821061e-06,
"loss": 0.5298,
"step": 2646
},
{
"epoch": 0.57,
"grad_norm": 0.1503492295742035,
"learning_rate": 4.111531686169415e-06,
"loss": 0.4757,
"step": 2647
},
{
"epoch": 0.57,
"grad_norm": 0.15045446157455444,
"learning_rate": 4.108098321161776e-06,
"loss": 0.5147,
"step": 2648
},
{
"epoch": 0.57,
"grad_norm": 0.14457084238529205,
"learning_rate": 4.104665390470034e-06,
"loss": 0.4722,
"step": 2649
},
{
"epoch": 0.57,
"grad_norm": 0.16404461860656738,
"learning_rate": 4.101232895765875e-06,
"loss": 0.5217,
"step": 2650
},
{
"epoch": 0.57,
"grad_norm": 0.15563920140266418,
"learning_rate": 4.0978008387207656e-06,
"loss": 0.4825,
"step": 2651
},
{
"epoch": 0.57,
"grad_norm": 0.1561812460422516,
"learning_rate": 4.094369221005965e-06,
"loss": 0.461,
"step": 2652
},
{
"epoch": 0.57,
"grad_norm": 0.1469080001115799,
"learning_rate": 4.090938044292517e-06,
"loss": 0.5018,
"step": 2653
},
{
"epoch": 0.57,
"grad_norm": 0.14523988962173462,
"learning_rate": 4.0875073102512485e-06,
"loss": 0.5539,
"step": 2654
},
{
"epoch": 0.57,
"grad_norm": 0.13977643847465515,
"learning_rate": 4.084077020552773e-06,
"loss": 0.5137,
"step": 2655
},
{
"epoch": 0.57,
"grad_norm": 0.1671827733516693,
"learning_rate": 4.080647176867486e-06,
"loss": 0.4837,
"step": 2656
},
{
"epoch": 0.57,
"grad_norm": 0.17201554775238037,
"learning_rate": 4.077217780865568e-06,
"loss": 0.5317,
"step": 2657
},
{
"epoch": 0.57,
"grad_norm": 0.16702772676944733,
"learning_rate": 4.07378883421698e-06,
"loss": 0.5107,
"step": 2658
},
{
"epoch": 0.57,
"grad_norm": 0.15763095021247864,
"learning_rate": 4.070360338591463e-06,
"loss": 0.4985,
"step": 2659
},
{
"epoch": 0.57,
"grad_norm": 0.19694013893604279,
"learning_rate": 4.066932295658543e-06,
"loss": 0.5392,
"step": 2660
},
{
"epoch": 0.57,
"grad_norm": 0.16363875567913055,
"learning_rate": 4.0635047070875175e-06,
"loss": 0.5371,
"step": 2661
},
{
"epoch": 0.57,
"grad_norm": 0.14083370566368103,
"learning_rate": 4.06007757454747e-06,
"loss": 0.5127,
"step": 2662
},
{
"epoch": 0.57,
"grad_norm": 0.20537738502025604,
"learning_rate": 4.056650899707262e-06,
"loss": 0.5337,
"step": 2663
},
{
"epoch": 0.57,
"grad_norm": 0.18272843956947327,
"learning_rate": 4.053224684235526e-06,
"loss": 0.4706,
"step": 2664
},
{
"epoch": 0.57,
"grad_norm": 0.1590733379125595,
"learning_rate": 4.049798929800676e-06,
"loss": 0.5598,
"step": 2665
},
{
"epoch": 0.57,
"grad_norm": 0.1680363267660141,
"learning_rate": 4.0463736380708986e-06,
"loss": 0.5321,
"step": 2666
},
{
"epoch": 0.57,
"grad_norm": 0.14845013618469238,
"learning_rate": 4.042948810714158e-06,
"loss": 0.508,
"step": 2667
},
{
"epoch": 0.57,
"grad_norm": 0.18387869000434875,
"learning_rate": 4.039524449398191e-06,
"loss": 0.5134,
"step": 2668
},
{
"epoch": 0.57,
"grad_norm": 0.14832563698291779,
"learning_rate": 4.036100555790505e-06,
"loss": 0.5149,
"step": 2669
},
{
"epoch": 0.58,
"grad_norm": 0.16505834460258484,
"learning_rate": 4.032677131558386e-06,
"loss": 0.5326,
"step": 2670
},
{
"epoch": 0.58,
"grad_norm": 0.16617228090763092,
"learning_rate": 4.0292541783688804e-06,
"loss": 0.5246,
"step": 2671
},
{
"epoch": 0.58,
"grad_norm": 0.14918413758277893,
"learning_rate": 4.025831697888817e-06,
"loss": 0.4876,
"step": 2672
},
{
"epoch": 0.58,
"grad_norm": 0.14089904725551605,
"learning_rate": 4.022409691784791e-06,
"loss": 0.4799,
"step": 2673
},
{
"epoch": 0.58,
"grad_norm": 0.20409740507602692,
"learning_rate": 4.01898816172316e-06,
"loss": 0.4963,
"step": 2674
},
{
"epoch": 0.58,
"grad_norm": 0.180314838886261,
"learning_rate": 4.015567109370059e-06,
"loss": 0.4895,
"step": 2675
},
{
"epoch": 0.58,
"grad_norm": 0.1631566435098648,
"learning_rate": 4.012146536391383e-06,
"loss": 0.4868,
"step": 2676
},
{
"epoch": 0.58,
"grad_norm": 0.1476244032382965,
"learning_rate": 4.008726444452799e-06,
"loss": 0.4909,
"step": 2677
},
{
"epoch": 0.58,
"grad_norm": 0.14692912995815277,
"learning_rate": 4.005306835219737e-06,
"loss": 0.48,
"step": 2678
},
{
"epoch": 0.58,
"grad_norm": 0.15567363798618317,
"learning_rate": 4.001887710357392e-06,
"loss": 0.5127,
"step": 2679
},
{
"epoch": 0.58,
"grad_norm": 0.14364320039749146,
"learning_rate": 3.998469071530725e-06,
"loss": 0.5628,
"step": 2680
},
{
"epoch": 0.58,
"grad_norm": 0.132903054356575,
"learning_rate": 3.995050920404457e-06,
"loss": 0.5542,
"step": 2681
},
{
"epoch": 0.58,
"grad_norm": 0.18202251195907593,
"learning_rate": 3.991633258643077e-06,
"loss": 0.5721,
"step": 2682
},
{
"epoch": 0.58,
"grad_norm": 0.16773462295532227,
"learning_rate": 3.988216087910827e-06,
"loss": 0.5039,
"step": 2683
},
{
"epoch": 0.58,
"grad_norm": 0.14804890751838684,
"learning_rate": 3.9847994098717166e-06,
"loss": 0.5011,
"step": 2684
},
{
"epoch": 0.58,
"grad_norm": 0.14443790912628174,
"learning_rate": 3.981383226189518e-06,
"loss": 0.5187,
"step": 2685
},
{
"epoch": 0.58,
"grad_norm": 0.1854676902294159,
"learning_rate": 3.9779675385277545e-06,
"loss": 0.564,
"step": 2686
},
{
"epoch": 0.58,
"grad_norm": 0.14072245359420776,
"learning_rate": 3.974552348549714e-06,
"loss": 0.4614,
"step": 2687
},
{
"epoch": 0.58,
"grad_norm": 0.1532825082540512,
"learning_rate": 3.971137657918437e-06,
"loss": 0.5517,
"step": 2688
},
{
"epoch": 0.58,
"grad_norm": 0.16841329634189606,
"learning_rate": 3.967723468296727e-06,
"loss": 0.4833,
"step": 2689
},
{
"epoch": 0.58,
"grad_norm": 0.16667723655700684,
"learning_rate": 3.96430978134714e-06,
"loss": 0.4646,
"step": 2690
},
{
"epoch": 0.58,
"grad_norm": 0.15399962663650513,
"learning_rate": 3.960896598731986e-06,
"loss": 0.5664,
"step": 2691
},
{
"epoch": 0.58,
"grad_norm": 0.13954004645347595,
"learning_rate": 3.957483922113334e-06,
"loss": 0.4877,
"step": 2692
},
{
"epoch": 0.58,
"grad_norm": 0.1900685578584671,
"learning_rate": 3.954071753152999e-06,
"loss": 0.5557,
"step": 2693
},
{
"epoch": 0.58,
"grad_norm": 0.16399481892585754,
"learning_rate": 3.950660093512556e-06,
"loss": 0.5266,
"step": 2694
},
{
"epoch": 0.58,
"grad_norm": 0.1279776692390442,
"learning_rate": 3.947248944853332e-06,
"loss": 0.4697,
"step": 2695
},
{
"epoch": 0.58,
"grad_norm": 0.14985980093479156,
"learning_rate": 3.943838308836398e-06,
"loss": 0.5437,
"step": 2696
},
{
"epoch": 0.58,
"grad_norm": 0.2673838138580322,
"learning_rate": 3.940428187122584e-06,
"loss": 0.5087,
"step": 2697
},
{
"epoch": 0.58,
"grad_norm": 0.14051130414009094,
"learning_rate": 3.937018581372462e-06,
"loss": 0.5061,
"step": 2698
},
{
"epoch": 0.58,
"grad_norm": 0.16947080194950104,
"learning_rate": 3.933609493246357e-06,
"loss": 0.5193,
"step": 2699
},
{
"epoch": 0.58,
"grad_norm": 0.18120256066322327,
"learning_rate": 3.9302009244043435e-06,
"loss": 0.576,
"step": 2700
},
{
"epoch": 0.58,
"grad_norm": 0.1373000591993332,
"learning_rate": 3.926792876506238e-06,
"loss": 0.5132,
"step": 2701
},
{
"epoch": 0.58,
"grad_norm": 0.13234984874725342,
"learning_rate": 3.923385351211609e-06,
"loss": 0.5311,
"step": 2702
},
{
"epoch": 0.58,
"grad_norm": 0.1670055389404297,
"learning_rate": 3.919978350179764e-06,
"loss": 0.5461,
"step": 2703
},
{
"epoch": 0.58,
"grad_norm": 0.13284355401992798,
"learning_rate": 3.916571875069764e-06,
"loss": 0.4916,
"step": 2704
},
{
"epoch": 0.58,
"grad_norm": 0.16038647294044495,
"learning_rate": 3.913165927540403e-06,
"loss": 0.5024,
"step": 2705
},
{
"epoch": 0.58,
"grad_norm": 0.15044786036014557,
"learning_rate": 3.909760509250225e-06,
"loss": 0.5306,
"step": 2706
},
{
"epoch": 0.58,
"grad_norm": 0.1481400430202484,
"learning_rate": 3.90635562185752e-06,
"loss": 0.4572,
"step": 2707
},
{
"epoch": 0.58,
"grad_norm": 0.11982067674398422,
"learning_rate": 3.902951267020311e-06,
"loss": 0.4793,
"step": 2708
},
{
"epoch": 0.58,
"grad_norm": 0.1427111029624939,
"learning_rate": 3.899547446396365e-06,
"loss": 0.5488,
"step": 2709
},
{
"epoch": 0.58,
"grad_norm": 0.18075178563594818,
"learning_rate": 3.896144161643189e-06,
"loss": 0.5251,
"step": 2710
},
{
"epoch": 0.58,
"grad_norm": 0.17146192491054535,
"learning_rate": 3.89274141441803e-06,
"loss": 0.5115,
"step": 2711
},
{
"epoch": 0.58,
"grad_norm": 0.19387201964855194,
"learning_rate": 3.8893392063778736e-06,
"loss": 0.5017,
"step": 2712
},
{
"epoch": 0.58,
"grad_norm": 0.12743881344795227,
"learning_rate": 3.88593753917944e-06,
"loss": 0.4378,
"step": 2713
},
{
"epoch": 0.58,
"grad_norm": 0.20177234709262848,
"learning_rate": 3.882536414479189e-06,
"loss": 0.5104,
"step": 2714
},
{
"epoch": 0.58,
"grad_norm": 0.15477432310581207,
"learning_rate": 3.879135833933311e-06,
"loss": 0.4847,
"step": 2715
},
{
"epoch": 0.59,
"grad_norm": 0.18448805809020996,
"learning_rate": 3.8757357991977415e-06,
"loss": 0.4854,
"step": 2716
},
{
"epoch": 0.59,
"grad_norm": 0.1681356281042099,
"learning_rate": 3.8723363119281426e-06,
"loss": 0.5493,
"step": 2717
},
{
"epoch": 0.59,
"grad_norm": 0.1758078634738922,
"learning_rate": 3.868937373779907e-06,
"loss": 0.5012,
"step": 2718
},
{
"epoch": 0.59,
"grad_norm": 0.18355970084667206,
"learning_rate": 3.865538986408169e-06,
"loss": 0.5385,
"step": 2719
},
{
"epoch": 0.59,
"grad_norm": 0.15605668723583221,
"learning_rate": 3.862141151467787e-06,
"loss": 0.547,
"step": 2720
},
{
"epoch": 0.59,
"grad_norm": 0.17370112240314484,
"learning_rate": 3.858743870613355e-06,
"loss": 0.5308,
"step": 2721
},
{
"epoch": 0.59,
"grad_norm": 0.1348809152841568,
"learning_rate": 3.855347145499197e-06,
"loss": 0.5194,
"step": 2722
},
{
"epoch": 0.59,
"grad_norm": 0.17120207846164703,
"learning_rate": 3.851950977779361e-06,
"loss": 0.5159,
"step": 2723
},
{
"epoch": 0.59,
"grad_norm": 0.15506203472614288,
"learning_rate": 3.848555369107631e-06,
"loss": 0.5213,
"step": 2724
},
{
"epoch": 0.59,
"grad_norm": 0.13186971843242645,
"learning_rate": 3.845160321137512e-06,
"loss": 0.4798,
"step": 2725
},
{
"epoch": 0.59,
"grad_norm": 0.15033838152885437,
"learning_rate": 3.841765835522242e-06,
"loss": 0.5573,
"step": 2726
},
{
"epoch": 0.59,
"grad_norm": 0.18248233199119568,
"learning_rate": 3.838371913914783e-06,
"loss": 0.4529,
"step": 2727
},
{
"epoch": 0.59,
"grad_norm": 0.1604524403810501,
"learning_rate": 3.83497855796782e-06,
"loss": 0.506,
"step": 2728
},
{
"epoch": 0.59,
"grad_norm": 0.1608039289712906,
"learning_rate": 3.831585769333766e-06,
"loss": 0.5207,
"step": 2729
},
{
"epoch": 0.59,
"grad_norm": 0.14408713579177856,
"learning_rate": 3.8281935496647526e-06,
"loss": 0.5487,
"step": 2730
},
{
"epoch": 0.59,
"grad_norm": 0.15173058211803436,
"learning_rate": 3.824801900612642e-06,
"loss": 0.5054,
"step": 2731
},
{
"epoch": 0.59,
"grad_norm": 0.20556017756462097,
"learning_rate": 3.821410823829011e-06,
"loss": 0.5244,
"step": 2732
},
{
"epoch": 0.59,
"grad_norm": 0.1307820975780487,
"learning_rate": 3.818020320965162e-06,
"loss": 0.5035,
"step": 2733
},
{
"epoch": 0.59,
"grad_norm": 0.18517783284187317,
"learning_rate": 3.8146303936721197e-06,
"loss": 0.4838,
"step": 2734
},
{
"epoch": 0.59,
"grad_norm": 0.13619892299175262,
"learning_rate": 3.811241043600622e-06,
"loss": 0.5416,
"step": 2735
},
{
"epoch": 0.59,
"grad_norm": 0.19370396435260773,
"learning_rate": 3.8078522724011324e-06,
"loss": 0.5622,
"step": 2736
},
{
"epoch": 0.59,
"grad_norm": 0.1536007523536682,
"learning_rate": 3.8044640817238276e-06,
"loss": 0.5121,
"step": 2737
},
{
"epoch": 0.59,
"grad_norm": 0.15463578701019287,
"learning_rate": 3.8010764732186044e-06,
"loss": 0.5102,
"step": 2738
},
{
"epoch": 0.59,
"grad_norm": 0.17096665501594543,
"learning_rate": 3.797689448535078e-06,
"loss": 0.4799,
"step": 2739
},
{
"epoch": 0.59,
"grad_norm": 0.3133319616317749,
"learning_rate": 3.79430300932257e-06,
"loss": 0.5698,
"step": 2740
},
{
"epoch": 0.59,
"grad_norm": 0.23030497133731842,
"learning_rate": 3.790917157230132e-06,
"loss": 0.5706,
"step": 2741
},
{
"epoch": 0.59,
"grad_norm": 0.15342505276203156,
"learning_rate": 3.7875318939065147e-06,
"loss": 0.4826,
"step": 2742
},
{
"epoch": 0.59,
"grad_norm": 0.2005234658718109,
"learning_rate": 3.784147221000191e-06,
"loss": 0.5415,
"step": 2743
},
{
"epoch": 0.59,
"grad_norm": 0.13762331008911133,
"learning_rate": 3.7807631401593455e-06,
"loss": 0.5106,
"step": 2744
},
{
"epoch": 0.59,
"grad_norm": 0.2076551467180252,
"learning_rate": 3.7773796530318703e-06,
"loss": 0.503,
"step": 2745
},
{
"epoch": 0.59,
"grad_norm": 0.1570519208908081,
"learning_rate": 3.773996761265373e-06,
"loss": 0.5074,
"step": 2746
},
{
"epoch": 0.59,
"grad_norm": 0.1342182457447052,
"learning_rate": 3.7706144665071683e-06,
"loss": 0.4931,
"step": 2747
},
{
"epoch": 0.59,
"grad_norm": 0.17213162779808044,
"learning_rate": 3.767232770404281e-06,
"loss": 0.4552,
"step": 2748
},
{
"epoch": 0.59,
"grad_norm": 0.13107101619243622,
"learning_rate": 3.7638516746034465e-06,
"loss": 0.4909,
"step": 2749
},
{
"epoch": 0.59,
"grad_norm": 0.16508126258850098,
"learning_rate": 3.7604711807511034e-06,
"loss": 0.523,
"step": 2750
},
{
"epoch": 0.59,
"grad_norm": 0.15281084179878235,
"learning_rate": 3.757091290493404e-06,
"loss": 0.5309,
"step": 2751
},
{
"epoch": 0.59,
"grad_norm": 0.20402151346206665,
"learning_rate": 3.753712005476197e-06,
"loss": 0.5493,
"step": 2752
},
{
"epoch": 0.59,
"grad_norm": 0.15612109005451202,
"learning_rate": 3.7503333273450425e-06,
"loss": 0.5259,
"step": 2753
},
{
"epoch": 0.59,
"grad_norm": 0.1936381310224533,
"learning_rate": 3.74695525774521e-06,
"loss": 0.5087,
"step": 2754
},
{
"epoch": 0.59,
"grad_norm": 0.1426432728767395,
"learning_rate": 3.7435777983216614e-06,
"loss": 0.5044,
"step": 2755
},
{
"epoch": 0.59,
"grad_norm": 0.14533087611198425,
"learning_rate": 3.7402009507190696e-06,
"loss": 0.5529,
"step": 2756
},
{
"epoch": 0.59,
"grad_norm": 0.15488633513450623,
"learning_rate": 3.7368247165818056e-06,
"loss": 0.4872,
"step": 2757
},
{
"epoch": 0.59,
"grad_norm": 0.14580923318862915,
"learning_rate": 3.733449097553945e-06,
"loss": 0.551,
"step": 2758
},
{
"epoch": 0.59,
"grad_norm": 0.17380273342132568,
"learning_rate": 3.7300740952792602e-06,
"loss": 0.5494,
"step": 2759
},
{
"epoch": 0.59,
"grad_norm": 0.171724334359169,
"learning_rate": 3.7266997114012265e-06,
"loss": 0.5556,
"step": 2760
},
{
"epoch": 0.59,
"grad_norm": 0.15848620235919952,
"learning_rate": 3.723325947563018e-06,
"loss": 0.5165,
"step": 2761
},
{
"epoch": 0.59,
"grad_norm": 0.15606124699115753,
"learning_rate": 3.7199528054075005e-06,
"loss": 0.5302,
"step": 2762
},
{
"epoch": 0.6,
"grad_norm": 0.16441625356674194,
"learning_rate": 3.7165802865772495e-06,
"loss": 0.5862,
"step": 2763
},
{
"epoch": 0.6,
"grad_norm": 0.13233539462089539,
"learning_rate": 3.713208392714523e-06,
"loss": 0.5144,
"step": 2764
},
{
"epoch": 0.6,
"grad_norm": 0.16361810266971588,
"learning_rate": 3.709837125461283e-06,
"loss": 0.4873,
"step": 2765
},
{
"epoch": 0.6,
"grad_norm": 0.9805002808570862,
"learning_rate": 3.7064664864591878e-06,
"loss": 0.5081,
"step": 2766
},
{
"epoch": 0.6,
"grad_norm": 0.15291385352611542,
"learning_rate": 3.7030964773495823e-06,
"loss": 0.4899,
"step": 2767
},
{
"epoch": 0.6,
"grad_norm": 0.19646501541137695,
"learning_rate": 3.6997270997735122e-06,
"loss": 0.5642,
"step": 2768
},
{
"epoch": 0.6,
"grad_norm": 0.15896441042423248,
"learning_rate": 3.6963583553717104e-06,
"loss": 0.5153,
"step": 2769
},
{
"epoch": 0.6,
"grad_norm": 0.15843161940574646,
"learning_rate": 3.6929902457846034e-06,
"loss": 0.497,
"step": 2770
},
{
"epoch": 0.6,
"grad_norm": 0.19402094185352325,
"learning_rate": 3.6896227726523113e-06,
"loss": 0.5438,
"step": 2771
},
{
"epoch": 0.6,
"grad_norm": 0.1643831878900528,
"learning_rate": 3.6862559376146388e-06,
"loss": 0.5383,
"step": 2772
},
{
"epoch": 0.6,
"grad_norm": 0.15504218637943268,
"learning_rate": 3.6828897423110866e-06,
"loss": 0.505,
"step": 2773
},
{
"epoch": 0.6,
"grad_norm": 0.1874060332775116,
"learning_rate": 3.6795241883808342e-06,
"loss": 0.5366,
"step": 2774
},
{
"epoch": 0.6,
"grad_norm": 0.16982296109199524,
"learning_rate": 3.676159277462757e-06,
"loss": 0.5237,
"step": 2775
},
{
"epoch": 0.6,
"grad_norm": 0.16953998804092407,
"learning_rate": 3.6727950111954186e-06,
"loss": 0.498,
"step": 2776
},
{
"epoch": 0.6,
"grad_norm": 0.1400230973958969,
"learning_rate": 3.66943139121706e-06,
"loss": 0.4611,
"step": 2777
},
{
"epoch": 0.6,
"grad_norm": 0.15184669196605682,
"learning_rate": 3.6660684191656155e-06,
"loss": 0.5214,
"step": 2778
},
{
"epoch": 0.6,
"grad_norm": 0.14015498757362366,
"learning_rate": 3.662706096678699e-06,
"loss": 0.4915,
"step": 2779
},
{
"epoch": 0.6,
"grad_norm": 0.17873437702655792,
"learning_rate": 3.6593444253936094e-06,
"loss": 0.4492,
"step": 2780
},
{
"epoch": 0.6,
"grad_norm": 0.1276986002922058,
"learning_rate": 3.655983406947332e-06,
"loss": 0.4904,
"step": 2781
},
{
"epoch": 0.6,
"grad_norm": 0.1345810890197754,
"learning_rate": 3.652623042976529e-06,
"loss": 0.5068,
"step": 2782
},
{
"epoch": 0.6,
"grad_norm": 0.17123238742351532,
"learning_rate": 3.649263335117548e-06,
"loss": 0.5292,
"step": 2783
},
{
"epoch": 0.6,
"grad_norm": 0.22209994494915009,
"learning_rate": 3.645904285006412e-06,
"loss": 0.5488,
"step": 2784
},
{
"epoch": 0.6,
"grad_norm": 0.29981812834739685,
"learning_rate": 3.6425458942788306e-06,
"loss": 0.4935,
"step": 2785
},
{
"epoch": 0.6,
"grad_norm": 0.17638364434242249,
"learning_rate": 3.6391881645701854e-06,
"loss": 0.5535,
"step": 2786
},
{
"epoch": 0.6,
"grad_norm": 0.1817181259393692,
"learning_rate": 3.63583109751554e-06,
"loss": 0.5224,
"step": 2787
},
{
"epoch": 0.6,
"grad_norm": 0.16286495327949524,
"learning_rate": 3.632474694749638e-06,
"loss": 0.5397,
"step": 2788
},
{
"epoch": 0.6,
"grad_norm": 0.13048282265663147,
"learning_rate": 3.629118957906892e-06,
"loss": 0.5172,
"step": 2789
},
{
"epoch": 0.6,
"grad_norm": 0.1269851177930832,
"learning_rate": 3.625763888621397e-06,
"loss": 0.4823,
"step": 2790
},
{
"epoch": 0.6,
"grad_norm": 0.15424852073192596,
"learning_rate": 3.6224094885269184e-06,
"loss": 0.5374,
"step": 2791
},
{
"epoch": 0.6,
"grad_norm": 0.1900346428155899,
"learning_rate": 3.6190557592569e-06,
"loss": 0.4719,
"step": 2792
},
{
"epoch": 0.6,
"grad_norm": 0.1395425945520401,
"learning_rate": 3.6157027024444558e-06,
"loss": 0.5218,
"step": 2793
},
{
"epoch": 0.6,
"grad_norm": 0.1748196929693222,
"learning_rate": 3.612350319722372e-06,
"loss": 0.5003,
"step": 2794
},
{
"epoch": 0.6,
"grad_norm": 0.15849445760250092,
"learning_rate": 3.6089986127231117e-06,
"loss": 0.5239,
"step": 2795
},
{
"epoch": 0.6,
"grad_norm": 0.16830691695213318,
"learning_rate": 3.6056475830787997e-06,
"loss": 0.5213,
"step": 2796
},
{
"epoch": 0.6,
"grad_norm": 0.13852837681770325,
"learning_rate": 3.6022972324212396e-06,
"loss": 0.4697,
"step": 2797
},
{
"epoch": 0.6,
"grad_norm": 0.12535005807876587,
"learning_rate": 3.5989475623819025e-06,
"loss": 0.5444,
"step": 2798
},
{
"epoch": 0.6,
"grad_norm": 0.1402188241481781,
"learning_rate": 3.595598574591923e-06,
"loss": 0.5238,
"step": 2799
},
{
"epoch": 0.6,
"grad_norm": 0.14916275441646576,
"learning_rate": 3.5922502706821094e-06,
"loss": 0.4976,
"step": 2800
},
{
"epoch": 0.6,
"grad_norm": 0.1618949919939041,
"learning_rate": 3.588902652282934e-06,
"loss": 0.5345,
"step": 2801
},
{
"epoch": 0.6,
"grad_norm": 0.14491844177246094,
"learning_rate": 3.585555721024535e-06,
"loss": 0.515,
"step": 2802
},
{
"epoch": 0.6,
"grad_norm": 0.15220017731189728,
"learning_rate": 3.58220947853672e-06,
"loss": 0.5332,
"step": 2803
},
{
"epoch": 0.6,
"grad_norm": 0.152902290225029,
"learning_rate": 3.578863926448955e-06,
"loss": 0.5592,
"step": 2804
},
{
"epoch": 0.6,
"grad_norm": 0.11480627208948135,
"learning_rate": 3.5755190663903753e-06,
"loss": 0.4952,
"step": 2805
},
{
"epoch": 0.6,
"grad_norm": 0.14540837705135345,
"learning_rate": 3.5721748999897753e-06,
"loss": 0.5294,
"step": 2806
},
{
"epoch": 0.6,
"grad_norm": 0.1490909457206726,
"learning_rate": 3.5688314288756136e-06,
"loss": 0.5052,
"step": 2807
},
{
"epoch": 0.6,
"grad_norm": 0.18195994198322296,
"learning_rate": 3.5654886546760125e-06,
"loss": 0.5326,
"step": 2808
},
{
"epoch": 0.61,
"grad_norm": 0.2022872418165207,
"learning_rate": 3.562146579018747e-06,
"loss": 0.5723,
"step": 2809
},
{
"epoch": 0.61,
"grad_norm": 0.15741689503192902,
"learning_rate": 3.558805203531263e-06,
"loss": 0.5499,
"step": 2810
},
{
"epoch": 0.61,
"grad_norm": 0.1889500916004181,
"learning_rate": 3.5554645298406553e-06,
"loss": 0.5991,
"step": 2811
},
{
"epoch": 0.61,
"grad_norm": 0.1986282765865326,
"learning_rate": 3.5521245595736837e-06,
"loss": 0.4946,
"step": 2812
},
{
"epoch": 0.61,
"grad_norm": 0.17025060951709747,
"learning_rate": 3.5487852943567614e-06,
"loss": 0.567,
"step": 2813
},
{
"epoch": 0.61,
"grad_norm": 0.14447635412216187,
"learning_rate": 3.5454467358159606e-06,
"loss": 0.4781,
"step": 2814
},
{
"epoch": 0.61,
"grad_norm": 0.12846069037914276,
"learning_rate": 3.54210888557701e-06,
"loss": 0.511,
"step": 2815
},
{
"epoch": 0.61,
"grad_norm": 0.180193692445755,
"learning_rate": 3.5387717452652914e-06,
"loss": 0.4993,
"step": 2816
},
{
"epoch": 0.61,
"grad_norm": 0.13410285115242004,
"learning_rate": 3.535435316505843e-06,
"loss": 0.4746,
"step": 2817
},
{
"epoch": 0.61,
"grad_norm": 0.16177906095981598,
"learning_rate": 3.53209960092335e-06,
"loss": 0.5347,
"step": 2818
},
{
"epoch": 0.61,
"grad_norm": 0.15283246338367462,
"learning_rate": 3.5287646001421604e-06,
"loss": 0.5191,
"step": 2819
},
{
"epoch": 0.61,
"grad_norm": 0.15224431455135345,
"learning_rate": 3.5254303157862707e-06,
"loss": 0.5055,
"step": 2820
},
{
"epoch": 0.61,
"grad_norm": 0.18944452702999115,
"learning_rate": 3.5220967494793216e-06,
"loss": 0.463,
"step": 2821
},
{
"epoch": 0.61,
"grad_norm": 0.15556566417217255,
"learning_rate": 3.5187639028446136e-06,
"loss": 0.5134,
"step": 2822
},
{
"epoch": 0.61,
"grad_norm": 0.155210942029953,
"learning_rate": 3.5154317775050906e-06,
"loss": 0.4888,
"step": 2823
},
{
"epoch": 0.61,
"grad_norm": 0.16802415251731873,
"learning_rate": 3.512100375083347e-06,
"loss": 0.5124,
"step": 2824
},
{
"epoch": 0.61,
"grad_norm": 0.23786631226539612,
"learning_rate": 3.508769697201629e-06,
"loss": 0.5722,
"step": 2825
},
{
"epoch": 0.61,
"grad_norm": 0.15338438749313354,
"learning_rate": 3.5054397454818224e-06,
"loss": 0.5459,
"step": 2826
},
{
"epoch": 0.61,
"grad_norm": 0.1475946456193924,
"learning_rate": 3.5021105215454666e-06,
"loss": 0.5012,
"step": 2827
},
{
"epoch": 0.61,
"grad_norm": 0.15379135310649872,
"learning_rate": 3.498782027013742e-06,
"loss": 0.5131,
"step": 2828
},
{
"epoch": 0.61,
"grad_norm": 0.20665378868579865,
"learning_rate": 3.4954542635074744e-06,
"loss": 0.5291,
"step": 2829
},
{
"epoch": 0.61,
"grad_norm": 0.1389567106962204,
"learning_rate": 3.4921272326471388e-06,
"loss": 0.5211,
"step": 2830
},
{
"epoch": 0.61,
"grad_norm": 0.1549108624458313,
"learning_rate": 3.488800936052843e-06,
"loss": 0.4565,
"step": 2831
},
{
"epoch": 0.61,
"grad_norm": 0.16236495971679688,
"learning_rate": 3.4854753753443494e-06,
"loss": 0.4741,
"step": 2832
},
{
"epoch": 0.61,
"grad_norm": 0.13626092672348022,
"learning_rate": 3.4821505521410514e-06,
"loss": 0.4822,
"step": 2833
},
{
"epoch": 0.61,
"grad_norm": 0.13619300723075867,
"learning_rate": 3.47882646806199e-06,
"loss": 0.4672,
"step": 2834
},
{
"epoch": 0.61,
"grad_norm": 0.17099611461162567,
"learning_rate": 3.4755031247258453e-06,
"loss": 0.5018,
"step": 2835
},
{
"epoch": 0.61,
"grad_norm": 0.2704041600227356,
"learning_rate": 3.472180523750933e-06,
"loss": 0.4887,
"step": 2836
},
{
"epoch": 0.61,
"grad_norm": 0.1702050119638443,
"learning_rate": 3.468858666755214e-06,
"loss": 0.4735,
"step": 2837
},
{
"epoch": 0.61,
"grad_norm": 0.13018356263637543,
"learning_rate": 3.4655375553562774e-06,
"loss": 0.5054,
"step": 2838
},
{
"epoch": 0.61,
"grad_norm": 0.15961863100528717,
"learning_rate": 3.4622171911713597e-06,
"loss": 0.4903,
"step": 2839
},
{
"epoch": 0.61,
"grad_norm": 0.20230530202388763,
"learning_rate": 3.458897575817326e-06,
"loss": 0.4923,
"step": 2840
},
{
"epoch": 0.61,
"grad_norm": 0.1560392677783966,
"learning_rate": 3.4555787109106786e-06,
"loss": 0.4996,
"step": 2841
},
{
"epoch": 0.61,
"grad_norm": 0.17162789404392242,
"learning_rate": 3.4522605980675593e-06,
"loss": 0.5324,
"step": 2842
},
{
"epoch": 0.61,
"grad_norm": 0.14241425693035126,
"learning_rate": 3.4489432389037326e-06,
"loss": 0.5093,
"step": 2843
},
{
"epoch": 0.61,
"grad_norm": 0.17781661450862885,
"learning_rate": 3.44562663503461e-06,
"loss": 0.545,
"step": 2844
},
{
"epoch": 0.61,
"grad_norm": 0.26344063878059387,
"learning_rate": 3.4423107880752227e-06,
"loss": 0.5451,
"step": 2845
},
{
"epoch": 0.61,
"grad_norm": 0.1670253723859787,
"learning_rate": 3.43899569964024e-06,
"loss": 0.4649,
"step": 2846
},
{
"epoch": 0.61,
"grad_norm": 0.17507214844226837,
"learning_rate": 3.4356813713439626e-06,
"loss": 0.5291,
"step": 2847
},
{
"epoch": 0.61,
"grad_norm": 0.1973615288734436,
"learning_rate": 3.432367804800316e-06,
"loss": 0.5424,
"step": 2848
},
{
"epoch": 0.61,
"grad_norm": 0.13851170241832733,
"learning_rate": 3.42905500162286e-06,
"loss": 0.4921,
"step": 2849
},
{
"epoch": 0.61,
"grad_norm": 0.15649986267089844,
"learning_rate": 3.4257429634247783e-06,
"loss": 0.5102,
"step": 2850
},
{
"epoch": 0.61,
"grad_norm": 0.1704344004392624,
"learning_rate": 3.4224316918188855e-06,
"loss": 0.5317,
"step": 2851
},
{
"epoch": 0.61,
"grad_norm": 0.19456495344638824,
"learning_rate": 3.419121188417622e-06,
"loss": 0.4987,
"step": 2852
},
{
"epoch": 0.61,
"grad_norm": 0.14243166148662567,
"learning_rate": 3.4158114548330525e-06,
"loss": 0.5126,
"step": 2853
},
{
"epoch": 0.61,
"grad_norm": 0.1448044627904892,
"learning_rate": 3.41250249267687e-06,
"loss": 0.5183,
"step": 2854
},
{
"epoch": 0.62,
"grad_norm": 0.17978918552398682,
"learning_rate": 3.409194303560387e-06,
"loss": 0.5421,
"step": 2855
},
{
"epoch": 0.62,
"grad_norm": 0.14264936745166779,
"learning_rate": 3.4058868890945425e-06,
"loss": 0.4958,
"step": 2856
},
{
"epoch": 0.62,
"grad_norm": 0.15832003951072693,
"learning_rate": 3.4025802508899025e-06,
"loss": 0.4939,
"step": 2857
},
{
"epoch": 0.62,
"grad_norm": 0.1486930102109909,
"learning_rate": 3.3992743905566453e-06,
"loss": 0.5264,
"step": 2858
},
{
"epoch": 0.62,
"grad_norm": 0.19173184037208557,
"learning_rate": 3.39596930970458e-06,
"loss": 0.5165,
"step": 2859
},
{
"epoch": 0.62,
"grad_norm": 0.17818816006183624,
"learning_rate": 3.3926650099431286e-06,
"loss": 0.5617,
"step": 2860
},
{
"epoch": 0.62,
"grad_norm": 0.15651050209999084,
"learning_rate": 3.389361492881337e-06,
"loss": 0.4856,
"step": 2861
},
{
"epoch": 0.62,
"grad_norm": 0.1457422971725464,
"learning_rate": 3.3860587601278715e-06,
"loss": 0.5187,
"step": 2862
},
{
"epoch": 0.62,
"grad_norm": 0.13978311419487,
"learning_rate": 3.3827568132910117e-06,
"loss": 0.493,
"step": 2863
},
{
"epoch": 0.62,
"grad_norm": 0.14989745616912842,
"learning_rate": 3.3794556539786584e-06,
"loss": 0.5355,
"step": 2864
},
{
"epoch": 0.62,
"grad_norm": 0.16385847330093384,
"learning_rate": 3.376155283798323e-06,
"loss": 0.5402,
"step": 2865
},
{
"epoch": 0.62,
"grad_norm": 0.1365756392478943,
"learning_rate": 3.372855704357144e-06,
"loss": 0.5018,
"step": 2866
},
{
"epoch": 0.62,
"grad_norm": 0.14765289425849915,
"learning_rate": 3.3695569172618613e-06,
"loss": 0.5786,
"step": 2867
},
{
"epoch": 0.62,
"grad_norm": 0.14326290786266327,
"learning_rate": 3.3662589241188382e-06,
"loss": 0.4799,
"step": 2868
},
{
"epoch": 0.62,
"grad_norm": 0.1515820473432541,
"learning_rate": 3.3629617265340497e-06,
"loss": 0.4875,
"step": 2869
},
{
"epoch": 0.62,
"grad_norm": 0.14540225267410278,
"learning_rate": 3.3596653261130806e-06,
"loss": 0.5127,
"step": 2870
},
{
"epoch": 0.62,
"grad_norm": 0.162192702293396,
"learning_rate": 3.3563697244611303e-06,
"loss": 0.4825,
"step": 2871
},
{
"epoch": 0.62,
"grad_norm": 0.1744917333126068,
"learning_rate": 3.3530749231830073e-06,
"loss": 0.4677,
"step": 2872
},
{
"epoch": 0.62,
"grad_norm": 0.15274450182914734,
"learning_rate": 3.3497809238831314e-06,
"loss": 0.498,
"step": 2873
},
{
"epoch": 0.62,
"grad_norm": 0.15344925224781036,
"learning_rate": 3.3464877281655335e-06,
"loss": 0.461,
"step": 2874
},
{
"epoch": 0.62,
"grad_norm": 0.14903058111667633,
"learning_rate": 3.3431953376338487e-06,
"loss": 0.5207,
"step": 2875
},
{
"epoch": 0.62,
"grad_norm": 0.15112550556659698,
"learning_rate": 3.339903753891326e-06,
"loss": 0.5271,
"step": 2876
},
{
"epoch": 0.62,
"grad_norm": 0.13480481505393982,
"learning_rate": 3.3366129785408143e-06,
"loss": 0.4761,
"step": 2877
},
{
"epoch": 0.62,
"grad_norm": 0.17278815805912018,
"learning_rate": 3.333323013184773e-06,
"loss": 0.494,
"step": 2878
},
{
"epoch": 0.62,
"grad_norm": 0.16020460426807404,
"learning_rate": 3.3300338594252724e-06,
"loss": 0.5306,
"step": 2879
},
{
"epoch": 0.62,
"grad_norm": 0.19360634684562683,
"learning_rate": 3.326745518863976e-06,
"loss": 0.5292,
"step": 2880
},
{
"epoch": 0.62,
"grad_norm": 0.15092292428016663,
"learning_rate": 3.323457993102161e-06,
"loss": 0.5234,
"step": 2881
},
{
"epoch": 0.62,
"grad_norm": 0.13326002657413483,
"learning_rate": 3.320171283740702e-06,
"loss": 0.4962,
"step": 2882
},
{
"epoch": 0.62,
"grad_norm": 0.13950808346271515,
"learning_rate": 3.316885392380078e-06,
"loss": 0.5058,
"step": 2883
},
{
"epoch": 0.62,
"grad_norm": 0.15002663433551788,
"learning_rate": 3.3136003206203727e-06,
"loss": 0.5212,
"step": 2884
},
{
"epoch": 0.62,
"grad_norm": 0.14820055663585663,
"learning_rate": 3.310316070061266e-06,
"loss": 0.5309,
"step": 2885
},
{
"epoch": 0.62,
"grad_norm": 0.15101811289787292,
"learning_rate": 3.307032642302041e-06,
"loss": 0.5228,
"step": 2886
},
{
"epoch": 0.62,
"grad_norm": 0.15565958619117737,
"learning_rate": 3.3037500389415756e-06,
"loss": 0.4449,
"step": 2887
},
{
"epoch": 0.62,
"grad_norm": 0.12206115573644638,
"learning_rate": 3.3004682615783524e-06,
"loss": 0.469,
"step": 2888
},
{
"epoch": 0.62,
"grad_norm": 0.15403220057487488,
"learning_rate": 3.2971873118104515e-06,
"loss": 0.4853,
"step": 2889
},
{
"epoch": 0.62,
"grad_norm": 0.15070055425167084,
"learning_rate": 3.2939071912355424e-06,
"loss": 0.5003,
"step": 2890
},
{
"epoch": 0.62,
"grad_norm": 0.14524191617965698,
"learning_rate": 3.290627901450899e-06,
"loss": 0.5121,
"step": 2891
},
{
"epoch": 0.62,
"grad_norm": 0.13863269984722137,
"learning_rate": 3.2873494440533856e-06,
"loss": 0.483,
"step": 2892
},
{
"epoch": 0.62,
"grad_norm": 0.162959486246109,
"learning_rate": 3.284071820639465e-06,
"loss": 0.4901,
"step": 2893
},
{
"epoch": 0.62,
"grad_norm": 0.1397026926279068,
"learning_rate": 3.2807950328051906e-06,
"loss": 0.4907,
"step": 2894
},
{
"epoch": 0.62,
"grad_norm": 0.17842566967010498,
"learning_rate": 3.2775190821462105e-06,
"loss": 0.5001,
"step": 2895
},
{
"epoch": 0.62,
"grad_norm": 0.25389254093170166,
"learning_rate": 3.2742439702577665e-06,
"loss": 0.5028,
"step": 2896
},
{
"epoch": 0.62,
"grad_norm": 0.13854780793190002,
"learning_rate": 3.2709696987346885e-06,
"loss": 0.5351,
"step": 2897
},
{
"epoch": 0.62,
"grad_norm": 0.14294210076332092,
"learning_rate": 3.267696269171402e-06,
"loss": 0.4752,
"step": 2898
},
{
"epoch": 0.62,
"grad_norm": 0.12487441301345825,
"learning_rate": 3.264423683161914e-06,
"loss": 0.4884,
"step": 2899
},
{
"epoch": 0.62,
"grad_norm": 0.1544751673936844,
"learning_rate": 3.2611519422998308e-06,
"loss": 0.5406,
"step": 2900
},
{
"epoch": 0.62,
"grad_norm": 0.16319073736667633,
"learning_rate": 3.257881048178344e-06,
"loss": 0.4985,
"step": 2901
},
{
"epoch": 0.63,
"grad_norm": 0.19490410387516022,
"learning_rate": 3.254611002390227e-06,
"loss": 0.5006,
"step": 2902
},
{
"epoch": 0.63,
"grad_norm": 0.14253075420856476,
"learning_rate": 3.251341806527848e-06,
"loss": 0.4988,
"step": 2903
},
{
"epoch": 0.63,
"grad_norm": 0.14755187928676605,
"learning_rate": 3.248073462183155e-06,
"loss": 0.5083,
"step": 2904
},
{
"epoch": 0.63,
"grad_norm": 0.1382237672805786,
"learning_rate": 3.2448059709476864e-06,
"loss": 0.4941,
"step": 2905
},
{
"epoch": 0.63,
"grad_norm": 0.13519005477428436,
"learning_rate": 3.2415393344125647e-06,
"loss": 0.4855,
"step": 2906
},
{
"epoch": 0.63,
"grad_norm": 0.2366933822631836,
"learning_rate": 3.2382735541684905e-06,
"loss": 0.4875,
"step": 2907
},
{
"epoch": 0.63,
"grad_norm": 0.15798290073871613,
"learning_rate": 3.235008631805755e-06,
"loss": 0.5288,
"step": 2908
},
{
"epoch": 0.63,
"grad_norm": 0.16785183548927307,
"learning_rate": 3.231744568914226e-06,
"loss": 0.5308,
"step": 2909
},
{
"epoch": 0.63,
"grad_norm": 0.19100995361804962,
"learning_rate": 3.228481367083356e-06,
"loss": 0.4923,
"step": 2910
},
{
"epoch": 0.63,
"grad_norm": 0.131486177444458,
"learning_rate": 3.2252190279021788e-06,
"loss": 0.4967,
"step": 2911
},
{
"epoch": 0.63,
"grad_norm": 0.15485283732414246,
"learning_rate": 3.2219575529593017e-06,
"loss": 0.465,
"step": 2912
},
{
"epoch": 0.63,
"grad_norm": 0.1736060082912445,
"learning_rate": 3.2186969438429217e-06,
"loss": 0.5094,
"step": 2913
},
{
"epoch": 0.63,
"grad_norm": 0.17122332751750946,
"learning_rate": 3.215437202140803e-06,
"loss": 0.4891,
"step": 2914
},
{
"epoch": 0.63,
"grad_norm": 0.15651971101760864,
"learning_rate": 3.2121783294402966e-06,
"loss": 0.4704,
"step": 2915
},
{
"epoch": 0.63,
"grad_norm": 0.16835874319076538,
"learning_rate": 3.2089203273283253e-06,
"loss": 0.4694,
"step": 2916
},
{
"epoch": 0.63,
"grad_norm": 0.15919756889343262,
"learning_rate": 3.205663197391389e-06,
"loss": 0.5043,
"step": 2917
},
{
"epoch": 0.63,
"grad_norm": 0.17332980036735535,
"learning_rate": 3.2024069412155632e-06,
"loss": 0.5494,
"step": 2918
},
{
"epoch": 0.63,
"grad_norm": 0.15382111072540283,
"learning_rate": 3.199151560386498e-06,
"loss": 0.4838,
"step": 2919
},
{
"epoch": 0.63,
"grad_norm": 0.19345510005950928,
"learning_rate": 3.1958970564894187e-06,
"loss": 0.4929,
"step": 2920
},
{
"epoch": 0.63,
"grad_norm": 0.18597455322742462,
"learning_rate": 3.192643431109117e-06,
"loss": 0.5576,
"step": 2921
},
{
"epoch": 0.63,
"grad_norm": 0.16669237613677979,
"learning_rate": 3.189390685829967e-06,
"loss": 0.4878,
"step": 2922
},
{
"epoch": 0.63,
"grad_norm": 0.13570186495780945,
"learning_rate": 3.186138822235908e-06,
"loss": 0.4852,
"step": 2923
},
{
"epoch": 0.63,
"grad_norm": 0.1756938099861145,
"learning_rate": 3.182887841910448e-06,
"loss": 0.5295,
"step": 2924
},
{
"epoch": 0.63,
"grad_norm": 0.1592927873134613,
"learning_rate": 3.1796377464366713e-06,
"loss": 0.5879,
"step": 2925
},
{
"epoch": 0.63,
"grad_norm": 0.13915982842445374,
"learning_rate": 3.1763885373972246e-06,
"loss": 0.498,
"step": 2926
},
{
"epoch": 0.63,
"grad_norm": 0.18962885439395905,
"learning_rate": 3.1731402163743284e-06,
"loss": 0.4949,
"step": 2927
},
{
"epoch": 0.63,
"grad_norm": 0.17103898525238037,
"learning_rate": 3.1698927849497683e-06,
"loss": 0.5678,
"step": 2928
},
{
"epoch": 0.63,
"grad_norm": 0.19355489313602448,
"learning_rate": 3.166646244704896e-06,
"loss": 0.4849,
"step": 2929
},
{
"epoch": 0.63,
"grad_norm": 0.14212578535079956,
"learning_rate": 3.1634005972206326e-06,
"loss": 0.4616,
"step": 2930
},
{
"epoch": 0.63,
"grad_norm": 0.13874362409114838,
"learning_rate": 3.160155844077459e-06,
"loss": 0.5322,
"step": 2931
},
{
"epoch": 0.63,
"grad_norm": 0.1573115438222885,
"learning_rate": 3.156911986855425e-06,
"loss": 0.555,
"step": 2932
},
{
"epoch": 0.63,
"grad_norm": 0.1475786417722702,
"learning_rate": 3.153669027134144e-06,
"loss": 0.5179,
"step": 2933
},
{
"epoch": 0.63,
"grad_norm": 0.13680386543273926,
"learning_rate": 3.150426966492788e-06,
"loss": 0.521,
"step": 2934
},
{
"epoch": 0.63,
"grad_norm": 0.1602596789598465,
"learning_rate": 3.147185806510099e-06,
"loss": 0.5499,
"step": 2935
},
{
"epoch": 0.63,
"grad_norm": 0.14966510236263275,
"learning_rate": 3.143945548764371e-06,
"loss": 0.4922,
"step": 2936
},
{
"epoch": 0.63,
"grad_norm": 0.14178875088691711,
"learning_rate": 3.140706194833466e-06,
"loss": 0.4547,
"step": 2937
},
{
"epoch": 0.63,
"grad_norm": 0.16615799069404602,
"learning_rate": 3.137467746294803e-06,
"loss": 0.5192,
"step": 2938
},
{
"epoch": 0.63,
"grad_norm": 0.19471901655197144,
"learning_rate": 3.13423020472536e-06,
"loss": 0.5068,
"step": 2939
},
{
"epoch": 0.63,
"grad_norm": 0.1289563924074173,
"learning_rate": 3.130993571701674e-06,
"loss": 0.483,
"step": 2940
},
{
"epoch": 0.63,
"grad_norm": 0.1688213050365448,
"learning_rate": 3.1277578487998387e-06,
"loss": 0.5033,
"step": 2941
},
{
"epoch": 0.63,
"grad_norm": 0.14173230528831482,
"learning_rate": 3.124523037595506e-06,
"loss": 0.4745,
"step": 2942
},
{
"epoch": 0.63,
"grad_norm": 0.1439976543188095,
"learning_rate": 3.1212891396638834e-06,
"loss": 0.4909,
"step": 2943
},
{
"epoch": 0.63,
"grad_norm": 0.13524580001831055,
"learning_rate": 3.1180561565797323e-06,
"loss": 0.5079,
"step": 2944
},
{
"epoch": 0.63,
"grad_norm": 0.1610611528158188,
"learning_rate": 3.114824089917372e-06,
"loss": 0.5046,
"step": 2945
},
{
"epoch": 0.63,
"grad_norm": 0.1482682079076767,
"learning_rate": 3.1115929412506698e-06,
"loss": 0.4762,
"step": 2946
},
{
"epoch": 0.63,
"grad_norm": 0.1553899049758911,
"learning_rate": 3.1083627121530512e-06,
"loss": 0.5337,
"step": 2947
},
{
"epoch": 0.64,
"grad_norm": 0.14075995981693268,
"learning_rate": 3.1051334041974923e-06,
"loss": 0.5239,
"step": 2948
},
{
"epoch": 0.64,
"grad_norm": 0.14739052951335907,
"learning_rate": 3.1019050189565193e-06,
"loss": 0.5304,
"step": 2949
},
{
"epoch": 0.64,
"grad_norm": 0.16444166004657745,
"learning_rate": 3.0986775580022122e-06,
"loss": 0.5106,
"step": 2950
},
{
"epoch": 0.64,
"grad_norm": 0.2006131410598755,
"learning_rate": 3.0954510229061963e-06,
"loss": 0.5723,
"step": 2951
},
{
"epoch": 0.64,
"grad_norm": 0.16884103417396545,
"learning_rate": 3.092225415239652e-06,
"loss": 0.5637,
"step": 2952
},
{
"epoch": 0.64,
"grad_norm": 0.13112773001194,
"learning_rate": 3.089000736573301e-06,
"loss": 0.5007,
"step": 2953
},
{
"epoch": 0.64,
"grad_norm": 0.14087074995040894,
"learning_rate": 3.0857769884774192e-06,
"loss": 0.5106,
"step": 2954
},
{
"epoch": 0.64,
"grad_norm": 0.17167288064956665,
"learning_rate": 3.0825541725218266e-06,
"loss": 0.5006,
"step": 2955
},
{
"epoch": 0.64,
"grad_norm": 0.16773132979869843,
"learning_rate": 3.079332290275887e-06,
"loss": 0.4808,
"step": 2956
},
{
"epoch": 0.64,
"grad_norm": 0.15428221225738525,
"learning_rate": 3.076111343308516e-06,
"loss": 0.531,
"step": 2957
},
{
"epoch": 0.64,
"grad_norm": 0.2029823362827301,
"learning_rate": 3.0728913331881638e-06,
"loss": 0.5106,
"step": 2958
},
{
"epoch": 0.64,
"grad_norm": 0.13769736886024475,
"learning_rate": 3.069672261482832e-06,
"loss": 0.5005,
"step": 2959
},
{
"epoch": 0.64,
"grad_norm": 0.17260031402111053,
"learning_rate": 3.0664541297600682e-06,
"loss": 0.5118,
"step": 2960
},
{
"epoch": 0.64,
"grad_norm": 0.1693435162305832,
"learning_rate": 3.063236939586951e-06,
"loss": 0.5139,
"step": 2961
},
{
"epoch": 0.64,
"grad_norm": 0.12653128802776337,
"learning_rate": 3.0600206925301114e-06,
"loss": 0.5241,
"step": 2962
},
{
"epoch": 0.64,
"grad_norm": 0.1622675359249115,
"learning_rate": 3.0568053901557126e-06,
"loss": 0.5418,
"step": 2963
},
{
"epoch": 0.64,
"grad_norm": 0.12737122178077698,
"learning_rate": 3.053591034029465e-06,
"loss": 0.4476,
"step": 2964
},
{
"epoch": 0.64,
"grad_norm": 0.17606867849826813,
"learning_rate": 3.0503776257166145e-06,
"loss": 0.5201,
"step": 2965
},
{
"epoch": 0.64,
"grad_norm": 0.21557646989822388,
"learning_rate": 3.0471651667819447e-06,
"loss": 0.4985,
"step": 2966
},
{
"epoch": 0.64,
"grad_norm": 0.20406164228916168,
"learning_rate": 3.0439536587897822e-06,
"loss": 0.4886,
"step": 2967
},
{
"epoch": 0.64,
"grad_norm": 0.147229865193367,
"learning_rate": 3.0407431033039795e-06,
"loss": 0.5053,
"step": 2968
},
{
"epoch": 0.64,
"grad_norm": 0.20733335614204407,
"learning_rate": 3.0375335018879383e-06,
"loss": 0.4798,
"step": 2969
},
{
"epoch": 0.64,
"grad_norm": 0.17706511914730072,
"learning_rate": 3.03432485610459e-06,
"loss": 0.4957,
"step": 2970
},
{
"epoch": 0.64,
"grad_norm": 0.18925561010837555,
"learning_rate": 3.031117167516395e-06,
"loss": 0.4832,
"step": 2971
},
{
"epoch": 0.64,
"grad_norm": 0.14262109994888306,
"learning_rate": 3.0279104376853592e-06,
"loss": 0.5004,
"step": 2972
},
{
"epoch": 0.64,
"grad_norm": 0.21173708140850067,
"learning_rate": 3.0247046681730107e-06,
"loss": 0.534,
"step": 2973
},
{
"epoch": 0.64,
"grad_norm": 0.1742897927761078,
"learning_rate": 3.0214998605404165e-06,
"loss": 0.539,
"step": 2974
},
{
"epoch": 0.64,
"grad_norm": 0.13318294286727905,
"learning_rate": 3.0182960163481745e-06,
"loss": 0.4896,
"step": 2975
},
{
"epoch": 0.64,
"grad_norm": 0.14285793900489807,
"learning_rate": 3.0150931371564107e-06,
"loss": 0.5225,
"step": 2976
},
{
"epoch": 0.64,
"grad_norm": 0.14382816851139069,
"learning_rate": 3.0118912245247846e-06,
"loss": 0.5033,
"step": 2977
},
{
"epoch": 0.64,
"grad_norm": 0.1816745102405548,
"learning_rate": 3.0086902800124806e-06,
"loss": 0.5737,
"step": 2978
},
{
"epoch": 0.64,
"grad_norm": 0.1659248024225235,
"learning_rate": 3.005490305178218e-06,
"loss": 0.513,
"step": 2979
},
{
"epoch": 0.64,
"grad_norm": 0.16415072977542877,
"learning_rate": 3.0022913015802363e-06,
"loss": 0.5032,
"step": 2980
},
{
"epoch": 0.64,
"grad_norm": 0.12613564729690552,
"learning_rate": 2.9990932707763067e-06,
"loss": 0.5208,
"step": 2981
},
{
"epoch": 0.64,
"grad_norm": 0.15900714695453644,
"learning_rate": 2.99589621432373e-06,
"loss": 0.517,
"step": 2982
},
{
"epoch": 0.64,
"grad_norm": 0.15835516154766083,
"learning_rate": 2.992700133779324e-06,
"loss": 0.5217,
"step": 2983
},
{
"epoch": 0.64,
"grad_norm": 0.15380804240703583,
"learning_rate": 2.9895050306994385e-06,
"loss": 0.5457,
"step": 2984
},
{
"epoch": 0.64,
"grad_norm": 0.138858824968338,
"learning_rate": 2.986310906639942e-06,
"loss": 0.5249,
"step": 2985
},
{
"epoch": 0.64,
"grad_norm": 0.13095752894878387,
"learning_rate": 2.9831177631562306e-06,
"loss": 0.4808,
"step": 2986
},
{
"epoch": 0.64,
"grad_norm": 0.12830592691898346,
"learning_rate": 2.9799256018032223e-06,
"loss": 0.54,
"step": 2987
},
{
"epoch": 0.64,
"grad_norm": 0.1949312835931778,
"learning_rate": 2.9767344241353535e-06,
"loss": 0.5108,
"step": 2988
},
{
"epoch": 0.64,
"grad_norm": 0.1589624434709549,
"learning_rate": 2.9735442317065864e-06,
"loss": 0.5641,
"step": 2989
},
{
"epoch": 0.64,
"grad_norm": 0.14621149003505707,
"learning_rate": 2.9703550260703974e-06,
"loss": 0.5448,
"step": 2990
},
{
"epoch": 0.64,
"grad_norm": 0.16770517826080322,
"learning_rate": 2.967166808779788e-06,
"loss": 0.5617,
"step": 2991
},
{
"epoch": 0.64,
"grad_norm": 0.1380135864019394,
"learning_rate": 2.9639795813872773e-06,
"loss": 0.5228,
"step": 2992
},
{
"epoch": 0.64,
"grad_norm": 0.13159281015396118,
"learning_rate": 2.9607933454448985e-06,
"loss": 0.5122,
"step": 2993
},
{
"epoch": 0.64,
"grad_norm": 0.15131685137748718,
"learning_rate": 2.9576081025042068e-06,
"loss": 0.481,
"step": 2994
},
{
"epoch": 0.65,
"grad_norm": 0.13696128129959106,
"learning_rate": 2.9544238541162713e-06,
"loss": 0.4559,
"step": 2995
},
{
"epoch": 0.65,
"grad_norm": 0.17516811192035675,
"learning_rate": 2.9512406018316763e-06,
"loss": 0.5363,
"step": 2996
},
{
"epoch": 0.65,
"grad_norm": 0.17963650822639465,
"learning_rate": 2.9480583472005253e-06,
"loss": 0.4986,
"step": 2997
},
{
"epoch": 0.65,
"grad_norm": 0.1492321640253067,
"learning_rate": 2.9448770917724296e-06,
"loss": 0.5725,
"step": 2998
},
{
"epoch": 0.65,
"grad_norm": 0.15479613840579987,
"learning_rate": 2.9416968370965194e-06,
"loss": 0.4926,
"step": 2999
},
{
"epoch": 0.65,
"grad_norm": 0.1259550005197525,
"learning_rate": 2.9385175847214325e-06,
"loss": 0.5108,
"step": 3000
},
{
"epoch": 0.65,
"grad_norm": 0.1810281127691269,
"learning_rate": 2.9353393361953237e-06,
"loss": 0.5176,
"step": 3001
},
{
"epoch": 0.65,
"grad_norm": 0.20367856323719025,
"learning_rate": 2.9321620930658578e-06,
"loss": 0.5562,
"step": 3002
},
{
"epoch": 0.65,
"grad_norm": 0.1935432255268097,
"learning_rate": 2.928985856880205e-06,
"loss": 0.4959,
"step": 3003
},
{
"epoch": 0.65,
"grad_norm": 0.17958539724349976,
"learning_rate": 2.925810629185054e-06,
"loss": 0.5234,
"step": 3004
},
{
"epoch": 0.65,
"grad_norm": 0.15984192490577698,
"learning_rate": 2.922636411526593e-06,
"loss": 0.5221,
"step": 3005
},
{
"epoch": 0.65,
"grad_norm": 0.13086757063865662,
"learning_rate": 2.919463205450526e-06,
"loss": 0.5034,
"step": 3006
},
{
"epoch": 0.65,
"grad_norm": 0.16409295797348022,
"learning_rate": 2.9162910125020575e-06,
"loss": 0.499,
"step": 3007
},
{
"epoch": 0.65,
"grad_norm": 0.1658695936203003,
"learning_rate": 2.9131198342259065e-06,
"loss": 0.5489,
"step": 3008
},
{
"epoch": 0.65,
"grad_norm": 0.2198559045791626,
"learning_rate": 2.9099496721662947e-06,
"loss": 0.5026,
"step": 3009
},
{
"epoch": 0.65,
"grad_norm": 0.1836353838443756,
"learning_rate": 2.9067805278669425e-06,
"loss": 0.5644,
"step": 3010
},
{
"epoch": 0.65,
"grad_norm": 0.20136743783950806,
"learning_rate": 2.9036124028710865e-06,
"loss": 0.5142,
"step": 3011
},
{
"epoch": 0.65,
"grad_norm": 0.2073100060224533,
"learning_rate": 2.900445298721455e-06,
"loss": 0.5486,
"step": 3012
},
{
"epoch": 0.65,
"grad_norm": 0.19056002795696259,
"learning_rate": 2.8972792169602882e-06,
"loss": 0.5525,
"step": 3013
},
{
"epoch": 0.65,
"grad_norm": 0.16226232051849365,
"learning_rate": 2.894114159129324e-06,
"loss": 0.5438,
"step": 3014
},
{
"epoch": 0.65,
"grad_norm": 0.15393410623073578,
"learning_rate": 2.890950126769803e-06,
"loss": 0.519,
"step": 3015
},
{
"epoch": 0.65,
"grad_norm": 0.13310056924819946,
"learning_rate": 2.8877871214224694e-06,
"loss": 0.5414,
"step": 3016
},
{
"epoch": 0.65,
"grad_norm": 0.15130481123924255,
"learning_rate": 2.8846251446275587e-06,
"loss": 0.5139,
"step": 3017
},
{
"epoch": 0.65,
"grad_norm": 0.14056378602981567,
"learning_rate": 2.881464197924814e-06,
"loss": 0.5016,
"step": 3018
},
{
"epoch": 0.65,
"grad_norm": 0.16934460401535034,
"learning_rate": 2.8783042828534756e-06,
"loss": 0.5251,
"step": 3019
},
{
"epoch": 0.65,
"grad_norm": 0.172510027885437,
"learning_rate": 2.875145400952274e-06,
"loss": 0.4938,
"step": 3020
},
{
"epoch": 0.65,
"grad_norm": 0.18168850243091583,
"learning_rate": 2.87198755375945e-06,
"loss": 0.557,
"step": 3021
},
{
"epoch": 0.65,
"grad_norm": 0.18108013272285461,
"learning_rate": 2.868830742812726e-06,
"loss": 0.5058,
"step": 3022
},
{
"epoch": 0.65,
"grad_norm": 0.20254182815551758,
"learning_rate": 2.865674969649329e-06,
"loss": 0.5228,
"step": 3023
},
{
"epoch": 0.65,
"grad_norm": 0.1535319983959198,
"learning_rate": 2.8625202358059806e-06,
"loss": 0.5533,
"step": 3024
},
{
"epoch": 0.65,
"grad_norm": 0.17317281663417816,
"learning_rate": 2.85936654281889e-06,
"loss": 0.5433,
"step": 3025
},
{
"epoch": 0.65,
"grad_norm": 0.12184549868106842,
"learning_rate": 2.8562138922237648e-06,
"loss": 0.5126,
"step": 3026
},
{
"epoch": 0.65,
"grad_norm": 0.15135183930397034,
"learning_rate": 2.8530622855558045e-06,
"loss": 0.4813,
"step": 3027
},
{
"epoch": 0.65,
"grad_norm": 0.23094992339611053,
"learning_rate": 2.8499117243496986e-06,
"loss": 0.4868,
"step": 3028
},
{
"epoch": 0.65,
"grad_norm": 0.13720989227294922,
"learning_rate": 2.846762210139631e-06,
"loss": 0.4968,
"step": 3029
},
{
"epoch": 0.65,
"grad_norm": 0.1362716108560562,
"learning_rate": 2.8436137444592694e-06,
"loss": 0.5245,
"step": 3030
},
{
"epoch": 0.65,
"grad_norm": 0.14415206015110016,
"learning_rate": 2.840466328841778e-06,
"loss": 0.5186,
"step": 3031
},
{
"epoch": 0.65,
"grad_norm": 0.18695032596588135,
"learning_rate": 2.837319964819801e-06,
"loss": 0.5611,
"step": 3032
},
{
"epoch": 0.65,
"grad_norm": 0.1513887345790863,
"learning_rate": 2.8341746539254807e-06,
"loss": 0.5893,
"step": 3033
},
{
"epoch": 0.65,
"grad_norm": 0.17001493275165558,
"learning_rate": 2.8310303976904396e-06,
"loss": 0.4993,
"step": 3034
},
{
"epoch": 0.65,
"grad_norm": 0.19183696806430817,
"learning_rate": 2.827887197645789e-06,
"loss": 0.5087,
"step": 3035
},
{
"epoch": 0.65,
"grad_norm": 0.151499405503273,
"learning_rate": 2.824745055322128e-06,
"loss": 0.557,
"step": 3036
},
{
"epoch": 0.65,
"grad_norm": 0.15552127361297607,
"learning_rate": 2.8216039722495336e-06,
"loss": 0.5215,
"step": 3037
},
{
"epoch": 0.65,
"grad_norm": 0.12379120290279388,
"learning_rate": 2.818463949957575e-06,
"loss": 0.5217,
"step": 3038
},
{
"epoch": 0.65,
"grad_norm": 0.13502056896686554,
"learning_rate": 2.8153249899753e-06,
"loss": 0.5244,
"step": 3039
},
{
"epoch": 0.65,
"grad_norm": 0.15221551060676575,
"learning_rate": 2.8121870938312413e-06,
"loss": 0.5248,
"step": 3040
},
{
"epoch": 0.66,
"grad_norm": 0.16277168691158295,
"learning_rate": 2.809050263053414e-06,
"loss": 0.4598,
"step": 3041
},
{
"epoch": 0.66,
"grad_norm": 0.1595809906721115,
"learning_rate": 2.80591449916931e-06,
"loss": 0.5505,
"step": 3042
},
{
"epoch": 0.66,
"grad_norm": 0.1773127317428589,
"learning_rate": 2.8027798037059094e-06,
"loss": 0.5169,
"step": 3043
},
{
"epoch": 0.66,
"grad_norm": 0.1667371243238449,
"learning_rate": 2.7996461781896624e-06,
"loss": 0.4966,
"step": 3044
},
{
"epoch": 0.66,
"grad_norm": 0.13818593323230743,
"learning_rate": 2.796513624146504e-06,
"loss": 0.5132,
"step": 3045
},
{
"epoch": 0.66,
"grad_norm": 0.13870275020599365,
"learning_rate": 2.7933821431018523e-06,
"loss": 0.528,
"step": 3046
},
{
"epoch": 0.66,
"grad_norm": 0.1374882310628891,
"learning_rate": 2.7902517365805916e-06,
"loss": 0.5159,
"step": 3047
},
{
"epoch": 0.66,
"grad_norm": 0.1938783973455429,
"learning_rate": 2.7871224061070935e-06,
"loss": 0.5242,
"step": 3048
},
{
"epoch": 0.66,
"grad_norm": 0.13137510418891907,
"learning_rate": 2.7839941532051952e-06,
"loss": 0.5338,
"step": 3049
},
{
"epoch": 0.66,
"grad_norm": 0.1456771343946457,
"learning_rate": 2.780866979398218e-06,
"loss": 0.5029,
"step": 3050
},
{
"epoch": 0.66,
"grad_norm": 0.16268415749073029,
"learning_rate": 2.7777408862089537e-06,
"loss": 0.5301,
"step": 3051
},
{
"epoch": 0.66,
"grad_norm": 0.21177208423614502,
"learning_rate": 2.77461587515967e-06,
"loss": 0.5032,
"step": 3052
},
{
"epoch": 0.66,
"grad_norm": 0.19144344329833984,
"learning_rate": 2.771491947772108e-06,
"loss": 0.5062,
"step": 3053
},
{
"epoch": 0.66,
"grad_norm": 0.13552603125572205,
"learning_rate": 2.7683691055674745e-06,
"loss": 0.5184,
"step": 3054
},
{
"epoch": 0.66,
"grad_norm": 0.2080407440662384,
"learning_rate": 2.765247350066455e-06,
"loss": 0.5691,
"step": 3055
},
{
"epoch": 0.66,
"grad_norm": 0.1384773850440979,
"learning_rate": 2.7621266827892062e-06,
"loss": 0.4668,
"step": 3056
},
{
"epoch": 0.66,
"grad_norm": 0.1618855744600296,
"learning_rate": 2.7590071052553487e-06,
"loss": 0.5399,
"step": 3057
},
{
"epoch": 0.66,
"grad_norm": 0.14525936543941498,
"learning_rate": 2.755888618983977e-06,
"loss": 0.5207,
"step": 3058
},
{
"epoch": 0.66,
"grad_norm": 0.15105114877223969,
"learning_rate": 2.7527712254936545e-06,
"loss": 0.5042,
"step": 3059
},
{
"epoch": 0.66,
"grad_norm": 0.1427949219942093,
"learning_rate": 2.749654926302412e-06,
"loss": 0.5236,
"step": 3060
},
{
"epoch": 0.66,
"grad_norm": 0.16231150925159454,
"learning_rate": 2.7465397229277435e-06,
"loss": 0.5481,
"step": 3061
},
{
"epoch": 0.66,
"grad_norm": 0.2165137529373169,
"learning_rate": 2.743425616886615e-06,
"loss": 0.5748,
"step": 3062
},
{
"epoch": 0.66,
"grad_norm": 0.2217060923576355,
"learning_rate": 2.740312609695455e-06,
"loss": 0.537,
"step": 3063
},
{
"epoch": 0.66,
"grad_norm": 0.1639140248298645,
"learning_rate": 2.737200702870157e-06,
"loss": 0.5766,
"step": 3064
},
{
"epoch": 0.66,
"grad_norm": 0.16004133224487305,
"learning_rate": 2.734089897926082e-06,
"loss": 0.5546,
"step": 3065
},
{
"epoch": 0.66,
"grad_norm": 0.1548355221748352,
"learning_rate": 2.7309801963780485e-06,
"loss": 0.5479,
"step": 3066
},
{
"epoch": 0.66,
"grad_norm": 0.13668109476566315,
"learning_rate": 2.727871599740342e-06,
"loss": 0.4974,
"step": 3067
},
{
"epoch": 0.66,
"grad_norm": 0.24507245421409607,
"learning_rate": 2.724764109526711e-06,
"loss": 0.5418,
"step": 3068
},
{
"epoch": 0.66,
"grad_norm": 0.1891452968120575,
"learning_rate": 2.721657727250359e-06,
"loss": 0.4869,
"step": 3069
},
{
"epoch": 0.66,
"grad_norm": 0.16605839133262634,
"learning_rate": 2.7185524544239567e-06,
"loss": 0.5408,
"step": 3070
},
{
"epoch": 0.66,
"grad_norm": 0.1509867161512375,
"learning_rate": 2.7154482925596314e-06,
"loss": 0.4962,
"step": 3071
},
{
"epoch": 0.66,
"grad_norm": 0.13401636481285095,
"learning_rate": 2.71234524316897e-06,
"loss": 0.4739,
"step": 3072
},
{
"epoch": 0.66,
"grad_norm": 0.15112657845020294,
"learning_rate": 2.709243307763019e-06,
"loss": 0.5719,
"step": 3073
},
{
"epoch": 0.66,
"grad_norm": 0.1450798213481903,
"learning_rate": 2.706142487852279e-06,
"loss": 0.5104,
"step": 3074
},
{
"epoch": 0.66,
"grad_norm": 0.17470777034759521,
"learning_rate": 2.7030427849467113e-06,
"loss": 0.5122,
"step": 3075
},
{
"epoch": 0.66,
"grad_norm": 0.173739492893219,
"learning_rate": 2.699944200555727e-06,
"loss": 0.4591,
"step": 3076
},
{
"epoch": 0.66,
"grad_norm": 0.1204950362443924,
"learning_rate": 2.696846736188202e-06,
"loss": 0.536,
"step": 3077
},
{
"epoch": 0.66,
"grad_norm": 0.1670408993959427,
"learning_rate": 2.693750393352462e-06,
"loss": 0.5477,
"step": 3078
},
{
"epoch": 0.66,
"grad_norm": 0.1568535566329956,
"learning_rate": 2.6906551735562824e-06,
"loss": 0.5682,
"step": 3079
},
{
"epoch": 0.66,
"grad_norm": 0.18247413635253906,
"learning_rate": 2.6875610783069007e-06,
"loss": 0.4769,
"step": 3080
},
{
"epoch": 0.66,
"grad_norm": 0.14836347103118896,
"learning_rate": 2.6844681091109958e-06,
"loss": 0.479,
"step": 3081
},
{
"epoch": 0.66,
"grad_norm": 0.15542642772197723,
"learning_rate": 2.681376267474707e-06,
"loss": 0.5113,
"step": 3082
},
{
"epoch": 0.66,
"grad_norm": 0.15311211347579956,
"learning_rate": 2.678285554903623e-06,
"loss": 0.5267,
"step": 3083
},
{
"epoch": 0.66,
"grad_norm": 0.23527516424655914,
"learning_rate": 2.67519597290278e-06,
"loss": 0.5006,
"step": 3084
},
{
"epoch": 0.66,
"grad_norm": 0.13628728687763214,
"learning_rate": 2.6721075229766673e-06,
"loss": 0.5323,
"step": 3085
},
{
"epoch": 0.66,
"grad_norm": 0.16927917301654816,
"learning_rate": 2.669020206629217e-06,
"loss": 0.5134,
"step": 3086
},
{
"epoch": 0.66,
"grad_norm": 0.17380353808403015,
"learning_rate": 2.665934025363817e-06,
"loss": 0.4888,
"step": 3087
},
{
"epoch": 0.67,
"grad_norm": 0.1672961264848709,
"learning_rate": 2.6628489806832947e-06,
"loss": 0.4992,
"step": 3088
},
{
"epoch": 0.67,
"grad_norm": 0.15757709741592407,
"learning_rate": 2.659765074089927e-06,
"loss": 0.5237,
"step": 3089
},
{
"epoch": 0.67,
"grad_norm": 0.18813352286815643,
"learning_rate": 2.6566823070854442e-06,
"loss": 0.5696,
"step": 3090
},
{
"epoch": 0.67,
"grad_norm": 0.17737697064876556,
"learning_rate": 2.653600681171008e-06,
"loss": 0.5657,
"step": 3091
},
{
"epoch": 0.67,
"grad_norm": 0.1634911447763443,
"learning_rate": 2.650520197847235e-06,
"loss": 0.4947,
"step": 3092
},
{
"epoch": 0.67,
"grad_norm": 0.17239625751972198,
"learning_rate": 2.6474408586141794e-06,
"loss": 0.4936,
"step": 3093
},
{
"epoch": 0.67,
"grad_norm": 0.15810348093509674,
"learning_rate": 2.6443626649713407e-06,
"loss": 0.5008,
"step": 3094
},
{
"epoch": 0.67,
"grad_norm": 0.13702960312366486,
"learning_rate": 2.6412856184176615e-06,
"loss": 0.5653,
"step": 3095
},
{
"epoch": 0.67,
"grad_norm": 0.16318099200725555,
"learning_rate": 2.6382097204515246e-06,
"loss": 0.4573,
"step": 3096
},
{
"epoch": 0.67,
"grad_norm": 0.14889857172966003,
"learning_rate": 2.6351349725707543e-06,
"loss": 0.5022,
"step": 3097
},
{
"epoch": 0.67,
"grad_norm": 0.18676966428756714,
"learning_rate": 2.6320613762726123e-06,
"loss": 0.5089,
"step": 3098
},
{
"epoch": 0.67,
"grad_norm": 0.20256297290325165,
"learning_rate": 2.628988933053802e-06,
"loss": 0.4871,
"step": 3099
},
{
"epoch": 0.67,
"grad_norm": 0.18140171468257904,
"learning_rate": 2.625917644410467e-06,
"loss": 0.5102,
"step": 3100
},
{
"epoch": 0.67,
"grad_norm": 0.12691918015480042,
"learning_rate": 2.6228475118381825e-06,
"loss": 0.4831,
"step": 3101
},
{
"epoch": 0.67,
"grad_norm": 0.16980133950710297,
"learning_rate": 2.6197785368319663e-06,
"loss": 0.4974,
"step": 3102
},
{
"epoch": 0.67,
"grad_norm": 0.13890565931797028,
"learning_rate": 2.6167107208862707e-06,
"loss": 0.5288,
"step": 3103
},
{
"epoch": 0.67,
"grad_norm": 0.17979633808135986,
"learning_rate": 2.613644065494985e-06,
"loss": 0.5096,
"step": 3104
},
{
"epoch": 0.67,
"grad_norm": 0.17529235780239105,
"learning_rate": 2.610578572151433e-06,
"loss": 0.5083,
"step": 3105
},
{
"epoch": 0.67,
"grad_norm": 0.13134679198265076,
"learning_rate": 2.6075142423483675e-06,
"loss": 0.5128,
"step": 3106
},
{
"epoch": 0.67,
"grad_norm": 0.12923552095890045,
"learning_rate": 2.6044510775779815e-06,
"loss": 0.4858,
"step": 3107
},
{
"epoch": 0.67,
"grad_norm": 0.17722611129283905,
"learning_rate": 2.6013890793318972e-06,
"loss": 0.5177,
"step": 3108
},
{
"epoch": 0.67,
"grad_norm": 0.17613767087459564,
"learning_rate": 2.5983282491011718e-06,
"loss": 0.5113,
"step": 3109
},
{
"epoch": 0.67,
"grad_norm": 0.15595421195030212,
"learning_rate": 2.5952685883762918e-06,
"loss": 0.4972,
"step": 3110
},
{
"epoch": 0.67,
"grad_norm": 0.23644490540027618,
"learning_rate": 2.59221009864717e-06,
"loss": 0.533,
"step": 3111
},
{
"epoch": 0.67,
"grad_norm": 0.14558325707912445,
"learning_rate": 2.589152781403158e-06,
"loss": 0.4991,
"step": 3112
},
{
"epoch": 0.67,
"grad_norm": 0.15675747394561768,
"learning_rate": 2.5860966381330265e-06,
"loss": 0.4931,
"step": 3113
},
{
"epoch": 0.67,
"grad_norm": 0.14677970111370087,
"learning_rate": 2.583041670324982e-06,
"loss": 0.4964,
"step": 3114
},
{
"epoch": 0.67,
"grad_norm": 0.1544618010520935,
"learning_rate": 2.5799878794666555e-06,
"loss": 0.5627,
"step": 3115
},
{
"epoch": 0.67,
"grad_norm": 0.15437090396881104,
"learning_rate": 2.5769352670451058e-06,
"loss": 0.5382,
"step": 3116
},
{
"epoch": 0.67,
"grad_norm": 0.16935445368289948,
"learning_rate": 2.57388383454682e-06,
"loss": 0.4881,
"step": 3117
},
{
"epoch": 0.67,
"grad_norm": 0.145218625664711,
"learning_rate": 2.5708335834577035e-06,
"loss": 0.492,
"step": 3118
},
{
"epoch": 0.67,
"grad_norm": 0.15230430662631989,
"learning_rate": 2.567784515263093e-06,
"loss": 0.5286,
"step": 3119
},
{
"epoch": 0.67,
"grad_norm": 0.1474408656358719,
"learning_rate": 2.5647366314477473e-06,
"loss": 0.5342,
"step": 3120
},
{
"epoch": 0.67,
"grad_norm": 0.14141744375228882,
"learning_rate": 2.561689933495849e-06,
"loss": 0.4877,
"step": 3121
},
{
"epoch": 0.67,
"grad_norm": 0.14578036963939667,
"learning_rate": 2.5586444228910036e-06,
"loss": 0.5148,
"step": 3122
},
{
"epoch": 0.67,
"grad_norm": 0.15471605956554413,
"learning_rate": 2.5556001011162337e-06,
"loss": 0.5346,
"step": 3123
},
{
"epoch": 0.67,
"grad_norm": 0.15913046896457672,
"learning_rate": 2.5525569696539916e-06,
"loss": 0.5056,
"step": 3124
},
{
"epoch": 0.67,
"grad_norm": 0.17138166725635529,
"learning_rate": 2.54951502998614e-06,
"loss": 0.5096,
"step": 3125
},
{
"epoch": 0.67,
"grad_norm": 0.18976645171642303,
"learning_rate": 2.546474283593969e-06,
"loss": 0.484,
"step": 3126
},
{
"epoch": 0.67,
"grad_norm": 0.14352168142795563,
"learning_rate": 2.5434347319581844e-06,
"loss": 0.4984,
"step": 3127
},
{
"epoch": 0.67,
"grad_norm": 0.16046349704265594,
"learning_rate": 2.540396376558912e-06,
"loss": 0.5463,
"step": 3128
},
{
"epoch": 0.67,
"grad_norm": 0.1726856231689453,
"learning_rate": 2.5373592188756946e-06,
"loss": 0.5361,
"step": 3129
},
{
"epoch": 0.67,
"grad_norm": 0.16727623343467712,
"learning_rate": 2.5343232603874868e-06,
"loss": 0.4938,
"step": 3130
},
{
"epoch": 0.67,
"grad_norm": 0.14829504489898682,
"learning_rate": 2.531288502572667e-06,
"loss": 0.5588,
"step": 3131
},
{
"epoch": 0.67,
"grad_norm": 0.1359606683254242,
"learning_rate": 2.5282549469090246e-06,
"loss": 0.5151,
"step": 3132
},
{
"epoch": 0.67,
"grad_norm": 0.17951302230358124,
"learning_rate": 2.525222594873764e-06,
"loss": 0.552,
"step": 3133
},
{
"epoch": 0.68,
"grad_norm": 0.1504855751991272,
"learning_rate": 2.522191447943506e-06,
"loss": 0.5304,
"step": 3134
},
{
"epoch": 0.68,
"grad_norm": 0.17266714572906494,
"learning_rate": 2.519161507594279e-06,
"loss": 0.513,
"step": 3135
},
{
"epoch": 0.68,
"grad_norm": 0.164722740650177,
"learning_rate": 2.5161327753015297e-06,
"loss": 0.5392,
"step": 3136
},
{
"epoch": 0.68,
"grad_norm": 0.22426824271678925,
"learning_rate": 2.5131052525401145e-06,
"loss": 0.5337,
"step": 3137
},
{
"epoch": 0.68,
"grad_norm": 0.15474985539913177,
"learning_rate": 2.5100789407842985e-06,
"loss": 0.5149,
"step": 3138
},
{
"epoch": 0.68,
"grad_norm": 0.155501589179039,
"learning_rate": 2.5070538415077593e-06,
"loss": 0.5177,
"step": 3139
},
{
"epoch": 0.68,
"grad_norm": 0.15586499869823456,
"learning_rate": 2.5040299561835846e-06,
"loss": 0.4912,
"step": 3140
},
{
"epoch": 0.68,
"grad_norm": 0.15372590720653534,
"learning_rate": 2.5010072862842725e-06,
"loss": 0.4981,
"step": 3141
},
{
"epoch": 0.68,
"grad_norm": 0.1472439020872116,
"learning_rate": 2.4979858332817225e-06,
"loss": 0.5167,
"step": 3142
},
{
"epoch": 0.68,
"grad_norm": 0.3344082534313202,
"learning_rate": 2.494965598647248e-06,
"loss": 0.5456,
"step": 3143
},
{
"epoch": 0.68,
"grad_norm": 0.13474471867084503,
"learning_rate": 2.4919465838515687e-06,
"loss": 0.5113,
"step": 3144
},
{
"epoch": 0.68,
"grad_norm": 0.15115109086036682,
"learning_rate": 2.488928790364804e-06,
"loss": 0.4906,
"step": 3145
},
{
"epoch": 0.68,
"grad_norm": 0.13708892464637756,
"learning_rate": 2.48591221965649e-06,
"loss": 0.484,
"step": 3146
},
{
"epoch": 0.68,
"grad_norm": 0.14575795829296112,
"learning_rate": 2.482896873195555e-06,
"loss": 0.5477,
"step": 3147
},
{
"epoch": 0.68,
"grad_norm": 0.14525777101516724,
"learning_rate": 2.479882752450339e-06,
"loss": 0.5041,
"step": 3148
},
{
"epoch": 0.68,
"grad_norm": 0.12615111470222473,
"learning_rate": 2.4768698588885842e-06,
"loss": 0.4841,
"step": 3149
},
{
"epoch": 0.68,
"grad_norm": 0.1459239274263382,
"learning_rate": 2.4738581939774303e-06,
"loss": 0.5168,
"step": 3150
},
{
"epoch": 0.68,
"grad_norm": 0.16422203183174133,
"learning_rate": 2.4708477591834244e-06,
"loss": 0.5476,
"step": 3151
},
{
"epoch": 0.68,
"grad_norm": 0.12386015802621841,
"learning_rate": 2.4678385559725125e-06,
"loss": 0.4401,
"step": 3152
},
{
"epoch": 0.68,
"grad_norm": 0.16602352261543274,
"learning_rate": 2.4648305858100413e-06,
"loss": 0.5279,
"step": 3153
},
{
"epoch": 0.68,
"grad_norm": 0.195119708776474,
"learning_rate": 2.4618238501607577e-06,
"loss": 0.4794,
"step": 3154
},
{
"epoch": 0.68,
"grad_norm": 0.14807678759098053,
"learning_rate": 2.4588183504888023e-06,
"loss": 0.4964,
"step": 3155
},
{
"epoch": 0.68,
"grad_norm": 0.1313076764345169,
"learning_rate": 2.455814088257723e-06,
"loss": 0.5141,
"step": 3156
},
{
"epoch": 0.68,
"grad_norm": 0.17979438602924347,
"learning_rate": 2.4528110649304555e-06,
"loss": 0.5335,
"step": 3157
},
{
"epoch": 0.68,
"grad_norm": 0.17119114100933075,
"learning_rate": 2.4498092819693364e-06,
"loss": 0.4784,
"step": 3158
},
{
"epoch": 0.68,
"grad_norm": 0.1639591008424759,
"learning_rate": 2.4468087408361053e-06,
"loss": 0.5275,
"step": 3159
},
{
"epoch": 0.68,
"grad_norm": 0.16429801285266876,
"learning_rate": 2.443809442991884e-06,
"loss": 0.4829,
"step": 3160
},
{
"epoch": 0.68,
"grad_norm": 0.1692316085100174,
"learning_rate": 2.440811389897199e-06,
"loss": 0.5242,
"step": 3161
},
{
"epoch": 0.68,
"grad_norm": 0.16549012064933777,
"learning_rate": 2.4378145830119637e-06,
"loss": 0.5217,
"step": 3162
},
{
"epoch": 0.68,
"grad_norm": 0.16364479064941406,
"learning_rate": 2.4348190237954893e-06,
"loss": 0.556,
"step": 3163
},
{
"epoch": 0.68,
"grad_norm": 0.14696238934993744,
"learning_rate": 2.4318247137064788e-06,
"loss": 0.5393,
"step": 3164
},
{
"epoch": 0.68,
"grad_norm": 0.16105543076992035,
"learning_rate": 2.428831654203025e-06,
"loss": 0.5169,
"step": 3165
},
{
"epoch": 0.68,
"grad_norm": 0.14257711172103882,
"learning_rate": 2.425839846742616e-06,
"loss": 0.5376,
"step": 3166
},
{
"epoch": 0.68,
"grad_norm": 0.1712980419397354,
"learning_rate": 2.4228492927821227e-06,
"loss": 0.4776,
"step": 3167
},
{
"epoch": 0.68,
"grad_norm": 0.15367096662521362,
"learning_rate": 2.4198599937778138e-06,
"loss": 0.4887,
"step": 3168
},
{
"epoch": 0.68,
"grad_norm": 0.17562294006347656,
"learning_rate": 2.41687195118534e-06,
"loss": 0.4691,
"step": 3169
},
{
"epoch": 0.68,
"grad_norm": 0.1532362997531891,
"learning_rate": 2.4138851664597424e-06,
"loss": 0.5247,
"step": 3170
},
{
"epoch": 0.68,
"grad_norm": 0.14801405370235443,
"learning_rate": 2.4108996410554565e-06,
"loss": 0.499,
"step": 3171
},
{
"epoch": 0.68,
"grad_norm": 0.24842116236686707,
"learning_rate": 2.407915376426293e-06,
"loss": 0.5365,
"step": 3172
},
{
"epoch": 0.68,
"grad_norm": 0.15684857964515686,
"learning_rate": 2.4049323740254575e-06,
"loss": 0.5435,
"step": 3173
},
{
"epoch": 0.68,
"grad_norm": 0.15908139944076538,
"learning_rate": 2.401950635305535e-06,
"loss": 0.5011,
"step": 3174
},
{
"epoch": 0.68,
"grad_norm": 0.1335798054933548,
"learning_rate": 2.3989701617184986e-06,
"loss": 0.5187,
"step": 3175
},
{
"epoch": 0.68,
"grad_norm": 0.14431187510490417,
"learning_rate": 2.395990954715705e-06,
"loss": 0.5294,
"step": 3176
},
{
"epoch": 0.68,
"grad_norm": 0.22326305508613586,
"learning_rate": 2.3930130157478938e-06,
"loss": 0.5639,
"step": 3177
},
{
"epoch": 0.68,
"grad_norm": 0.11609046161174774,
"learning_rate": 2.390036346265188e-06,
"loss": 0.5045,
"step": 3178
},
{
"epoch": 0.68,
"grad_norm": 0.1769070327281952,
"learning_rate": 2.387060947717089e-06,
"loss": 0.4945,
"step": 3179
},
{
"epoch": 0.69,
"grad_norm": 0.20527726411819458,
"learning_rate": 2.3840868215524824e-06,
"loss": 0.5375,
"step": 3180
},
{
"epoch": 0.69,
"grad_norm": 0.1666276752948761,
"learning_rate": 2.381113969219636e-06,
"loss": 0.5197,
"step": 3181
},
{
"epoch": 0.69,
"grad_norm": 0.18321175873279572,
"learning_rate": 2.378142392166191e-06,
"loss": 0.5291,
"step": 3182
},
{
"epoch": 0.69,
"grad_norm": 0.14883701503276825,
"learning_rate": 2.375172091839174e-06,
"loss": 0.5554,
"step": 3183
},
{
"epoch": 0.69,
"grad_norm": 0.13777758181095123,
"learning_rate": 2.3722030696849857e-06,
"loss": 0.5396,
"step": 3184
},
{
"epoch": 0.69,
"grad_norm": 0.14664918184280396,
"learning_rate": 2.3692353271494073e-06,
"loss": 0.4809,
"step": 3185
},
{
"epoch": 0.69,
"grad_norm": 0.2592916190624237,
"learning_rate": 2.3662688656775973e-06,
"loss": 0.4879,
"step": 3186
},
{
"epoch": 0.69,
"grad_norm": 0.22084404528141022,
"learning_rate": 2.3633036867140843e-06,
"loss": 0.5349,
"step": 3187
},
{
"epoch": 0.69,
"grad_norm": 0.17190620303153992,
"learning_rate": 2.3603397917027787e-06,
"loss": 0.518,
"step": 3188
},
{
"epoch": 0.69,
"grad_norm": 0.16831335425376892,
"learning_rate": 2.3573771820869646e-06,
"loss": 0.4805,
"step": 3189
},
{
"epoch": 0.69,
"grad_norm": 0.1860908567905426,
"learning_rate": 2.3544158593092986e-06,
"loss": 0.5356,
"step": 3190
},
{
"epoch": 0.69,
"grad_norm": 0.1522263139486313,
"learning_rate": 2.3514558248118134e-06,
"loss": 0.532,
"step": 3191
},
{
"epoch": 0.69,
"grad_norm": 0.1509491503238678,
"learning_rate": 2.3484970800359087e-06,
"loss": 0.5388,
"step": 3192
},
{
"epoch": 0.69,
"grad_norm": 0.1586175262928009,
"learning_rate": 2.345539626422363e-06,
"loss": 0.5192,
"step": 3193
},
{
"epoch": 0.69,
"grad_norm": 0.13638122379779816,
"learning_rate": 2.34258346541132e-06,
"loss": 0.4945,
"step": 3194
},
{
"epoch": 0.69,
"grad_norm": 0.1359930783510208,
"learning_rate": 2.339628598442298e-06,
"loss": 0.5176,
"step": 3195
},
{
"epoch": 0.69,
"grad_norm": 0.12893450260162354,
"learning_rate": 2.3366750269541833e-06,
"loss": 0.5562,
"step": 3196
},
{
"epoch": 0.69,
"grad_norm": 0.13871856033802032,
"learning_rate": 2.3337227523852337e-06,
"loss": 0.5027,
"step": 3197
},
{
"epoch": 0.69,
"grad_norm": 0.3347227871417999,
"learning_rate": 2.3307717761730745e-06,
"loss": 0.4677,
"step": 3198
},
{
"epoch": 0.69,
"grad_norm": 0.14517731964588165,
"learning_rate": 2.3278220997546947e-06,
"loss": 0.477,
"step": 3199
},
{
"epoch": 0.69,
"grad_norm": 0.15198582410812378,
"learning_rate": 2.3248737245664575e-06,
"loss": 0.4762,
"step": 3200
},
{
"epoch": 0.69,
"grad_norm": 0.14821645617485046,
"learning_rate": 2.3219266520440833e-06,
"loss": 0.5042,
"step": 3201
},
{
"epoch": 0.69,
"grad_norm": 0.14736728370189667,
"learning_rate": 2.318980883622668e-06,
"loss": 0.542,
"step": 3202
},
{
"epoch": 0.69,
"grad_norm": 0.1347406953573227,
"learning_rate": 2.3160364207366687e-06,
"loss": 0.5291,
"step": 3203
},
{
"epoch": 0.69,
"grad_norm": 0.1243584007024765,
"learning_rate": 2.313093264819903e-06,
"loss": 0.5376,
"step": 3204
},
{
"epoch": 0.69,
"grad_norm": 0.1613331139087677,
"learning_rate": 2.310151417305558e-06,
"loss": 0.517,
"step": 3205
},
{
"epoch": 0.69,
"grad_norm": 0.16766297817230225,
"learning_rate": 2.3072108796261766e-06,
"loss": 0.4946,
"step": 3206
},
{
"epoch": 0.69,
"grad_norm": 0.161861851811409,
"learning_rate": 2.3042716532136718e-06,
"loss": 0.4984,
"step": 3207
},
{
"epoch": 0.69,
"grad_norm": 0.13287605345249176,
"learning_rate": 2.301333739499312e-06,
"loss": 0.4903,
"step": 3208
},
{
"epoch": 0.69,
"grad_norm": 0.1508372724056244,
"learning_rate": 2.2983971399137302e-06,
"loss": 0.5094,
"step": 3209
},
{
"epoch": 0.69,
"grad_norm": 0.11505939811468124,
"learning_rate": 2.2954618558869194e-06,
"loss": 0.4829,
"step": 3210
},
{
"epoch": 0.69,
"grad_norm": 0.19699527323246002,
"learning_rate": 2.2925278888482273e-06,
"loss": 0.508,
"step": 3211
},
{
"epoch": 0.69,
"grad_norm": 0.14467853307724,
"learning_rate": 2.2895952402263642e-06,
"loss": 0.508,
"step": 3212
},
{
"epoch": 0.69,
"grad_norm": 0.13654360175132751,
"learning_rate": 2.286663911449401e-06,
"loss": 0.4768,
"step": 3213
},
{
"epoch": 0.69,
"grad_norm": 0.18661533296108246,
"learning_rate": 2.283733903944756e-06,
"loss": 0.5364,
"step": 3214
},
{
"epoch": 0.69,
"grad_norm": 0.1598883867263794,
"learning_rate": 2.280805219139219e-06,
"loss": 0.5225,
"step": 3215
},
{
"epoch": 0.69,
"grad_norm": 0.174927219748497,
"learning_rate": 2.2778778584589214e-06,
"loss": 0.468,
"step": 3216
},
{
"epoch": 0.69,
"grad_norm": 0.13632676005363464,
"learning_rate": 2.274951823329358e-06,
"loss": 0.5381,
"step": 3217
},
{
"epoch": 0.69,
"grad_norm": 0.1499364972114563,
"learning_rate": 2.272027115175377e-06,
"loss": 0.5137,
"step": 3218
},
{
"epoch": 0.69,
"grad_norm": 0.1480303555727005,
"learning_rate": 2.2691037354211767e-06,
"loss": 0.4381,
"step": 3219
},
{
"epoch": 0.69,
"grad_norm": 0.13622407615184784,
"learning_rate": 2.2661816854903117e-06,
"loss": 0.556,
"step": 3220
},
{
"epoch": 0.69,
"grad_norm": 0.15215899050235748,
"learning_rate": 2.2632609668056906e-06,
"loss": 0.5208,
"step": 3221
},
{
"epoch": 0.69,
"grad_norm": 0.15400218963623047,
"learning_rate": 2.2603415807895718e-06,
"loss": 0.5073,
"step": 3222
},
{
"epoch": 0.69,
"grad_norm": 0.1677476465702057,
"learning_rate": 2.257423528863562e-06,
"loss": 0.4861,
"step": 3223
},
{
"epoch": 0.69,
"grad_norm": 0.1435810774564743,
"learning_rate": 2.254506812448622e-06,
"loss": 0.5095,
"step": 3224
},
{
"epoch": 0.69,
"grad_norm": 0.18489789962768555,
"learning_rate": 2.2515914329650636e-06,
"loss": 0.5538,
"step": 3225
},
{
"epoch": 0.69,
"grad_norm": 0.17401094734668732,
"learning_rate": 2.2486773918325394e-06,
"loss": 0.503,
"step": 3226
},
{
"epoch": 0.7,
"grad_norm": 0.13293685019016266,
"learning_rate": 2.2457646904700632e-06,
"loss": 0.524,
"step": 3227
},
{
"epoch": 0.7,
"grad_norm": 0.1453235000371933,
"learning_rate": 2.242853330295984e-06,
"loss": 0.5121,
"step": 3228
},
{
"epoch": 0.7,
"grad_norm": 0.1937217265367508,
"learning_rate": 2.239943312728004e-06,
"loss": 0.5165,
"step": 3229
},
{
"epoch": 0.7,
"grad_norm": 0.1602119654417038,
"learning_rate": 2.2370346391831737e-06,
"loss": 0.5076,
"step": 3230
},
{
"epoch": 0.7,
"grad_norm": 0.12913572788238525,
"learning_rate": 2.2341273110778817e-06,
"loss": 0.5258,
"step": 3231
},
{
"epoch": 0.7,
"grad_norm": 0.16789868474006653,
"learning_rate": 2.231221329827867e-06,
"loss": 0.5211,
"step": 3232
},
{
"epoch": 0.7,
"grad_norm": 0.2248123735189438,
"learning_rate": 2.228316696848212e-06,
"loss": 0.5348,
"step": 3233
},
{
"epoch": 0.7,
"grad_norm": 0.15316419303417206,
"learning_rate": 2.225413413553341e-06,
"loss": 0.5302,
"step": 3234
},
{
"epoch": 0.7,
"grad_norm": 0.13006356358528137,
"learning_rate": 2.222511481357026e-06,
"loss": 0.5373,
"step": 3235
},
{
"epoch": 0.7,
"grad_norm": 0.14037051796913147,
"learning_rate": 2.219610901672371e-06,
"loss": 0.5641,
"step": 3236
},
{
"epoch": 0.7,
"grad_norm": 0.17289666831493378,
"learning_rate": 2.216711675911833e-06,
"loss": 0.5221,
"step": 3237
},
{
"epoch": 0.7,
"grad_norm": 0.1685284972190857,
"learning_rate": 2.2138138054871993e-06,
"loss": 0.5341,
"step": 3238
},
{
"epoch": 0.7,
"grad_norm": 0.14729037880897522,
"learning_rate": 2.2109172918096034e-06,
"loss": 0.5447,
"step": 3239
},
{
"epoch": 0.7,
"grad_norm": 0.17818932235240936,
"learning_rate": 2.208022136289521e-06,
"loss": 0.5437,
"step": 3240
},
{
"epoch": 0.7,
"grad_norm": 0.14237408339977264,
"learning_rate": 2.205128340336758e-06,
"loss": 0.5103,
"step": 3241
},
{
"epoch": 0.7,
"grad_norm": 0.17500755190849304,
"learning_rate": 2.2022359053604654e-06,
"loss": 0.5444,
"step": 3242
},
{
"epoch": 0.7,
"grad_norm": 0.16034772992134094,
"learning_rate": 2.199344832769125e-06,
"loss": 0.4989,
"step": 3243
},
{
"epoch": 0.7,
"grad_norm": 0.18941017985343933,
"learning_rate": 2.1964551239705604e-06,
"loss": 0.5812,
"step": 3244
},
{
"epoch": 0.7,
"grad_norm": 0.19451190531253815,
"learning_rate": 2.1935667803719307e-06,
"loss": 0.4824,
"step": 3245
},
{
"epoch": 0.7,
"grad_norm": 0.15695609152317047,
"learning_rate": 2.1906798033797276e-06,
"loss": 0.6157,
"step": 3246
},
{
"epoch": 0.7,
"grad_norm": 0.1591734141111374,
"learning_rate": 2.1877941943997817e-06,
"loss": 0.5586,
"step": 3247
},
{
"epoch": 0.7,
"grad_norm": 0.15630222856998444,
"learning_rate": 2.1849099548372492e-06,
"loss": 0.5194,
"step": 3248
},
{
"epoch": 0.7,
"grad_norm": 0.16930875182151794,
"learning_rate": 2.18202708609663e-06,
"loss": 0.4618,
"step": 3249
},
{
"epoch": 0.7,
"grad_norm": 0.12013290822505951,
"learning_rate": 2.179145589581747e-06,
"loss": 0.5097,
"step": 3250
},
{
"epoch": 0.7,
"grad_norm": 0.125696063041687,
"learning_rate": 2.1762654666957606e-06,
"loss": 0.5076,
"step": 3251
},
{
"epoch": 0.7,
"grad_norm": 0.1357826292514801,
"learning_rate": 2.1733867188411606e-06,
"loss": 0.5084,
"step": 3252
},
{
"epoch": 0.7,
"grad_norm": 0.14690843224525452,
"learning_rate": 2.170509347419768e-06,
"loss": 0.5231,
"step": 3253
},
{
"epoch": 0.7,
"grad_norm": 0.14461292326450348,
"learning_rate": 2.167633353832734e-06,
"loss": 0.5063,
"step": 3254
},
{
"epoch": 0.7,
"grad_norm": 0.12568648159503937,
"learning_rate": 2.1647587394805353e-06,
"loss": 0.4546,
"step": 3255
},
{
"epoch": 0.7,
"grad_norm": 0.15997205674648285,
"learning_rate": 2.1618855057629804e-06,
"loss": 0.5882,
"step": 3256
},
{
"epoch": 0.7,
"grad_norm": 0.18054303526878357,
"learning_rate": 2.159013654079205e-06,
"loss": 0.4966,
"step": 3257
},
{
"epoch": 0.7,
"grad_norm": 0.15831291675567627,
"learning_rate": 2.156143185827671e-06,
"loss": 0.5021,
"step": 3258
},
{
"epoch": 0.7,
"grad_norm": 0.2396220564842224,
"learning_rate": 2.153274102406169e-06,
"loss": 0.5147,
"step": 3259
},
{
"epoch": 0.7,
"grad_norm": 0.13623584806919098,
"learning_rate": 2.1504064052118095e-06,
"loss": 0.4903,
"step": 3260
},
{
"epoch": 0.7,
"grad_norm": 0.18768242001533508,
"learning_rate": 2.1475400956410337e-06,
"loss": 0.5416,
"step": 3261
},
{
"epoch": 0.7,
"grad_norm": 0.15971659123897552,
"learning_rate": 2.144675175089606e-06,
"loss": 0.5072,
"step": 3262
},
{
"epoch": 0.7,
"grad_norm": 0.20064754784107208,
"learning_rate": 2.1418116449526117e-06,
"loss": 0.4895,
"step": 3263
},
{
"epoch": 0.7,
"grad_norm": 0.1600414365530014,
"learning_rate": 2.1389495066244613e-06,
"loss": 0.4434,
"step": 3264
},
{
"epoch": 0.7,
"grad_norm": 0.17641644179821014,
"learning_rate": 2.136088761498888e-06,
"loss": 0.5195,
"step": 3265
},
{
"epoch": 0.7,
"grad_norm": 0.16606129705905914,
"learning_rate": 2.1332294109689446e-06,
"loss": 0.4901,
"step": 3266
},
{
"epoch": 0.7,
"grad_norm": 0.16686317324638367,
"learning_rate": 2.1303714564270086e-06,
"loss": 0.5201,
"step": 3267
},
{
"epoch": 0.7,
"grad_norm": 0.14948517084121704,
"learning_rate": 2.127514899264771e-06,
"loss": 0.5022,
"step": 3268
},
{
"epoch": 0.7,
"grad_norm": 0.21814821660518646,
"learning_rate": 2.1246597408732493e-06,
"loss": 0.5412,
"step": 3269
},
{
"epoch": 0.7,
"grad_norm": 0.16125313937664032,
"learning_rate": 2.1218059826427727e-06,
"loss": 0.4777,
"step": 3270
},
{
"epoch": 0.7,
"grad_norm": 0.15708723664283752,
"learning_rate": 2.118953625962998e-06,
"loss": 0.4976,
"step": 3271
},
{
"epoch": 0.7,
"grad_norm": 0.14696088433265686,
"learning_rate": 2.1161026722228932e-06,
"loss": 0.4869,
"step": 3272
},
{
"epoch": 0.71,
"grad_norm": 0.173833429813385,
"learning_rate": 2.1132531228107416e-06,
"loss": 0.5291,
"step": 3273
},
{
"epoch": 0.71,
"grad_norm": 0.17601168155670166,
"learning_rate": 2.110404979114149e-06,
"loss": 0.53,
"step": 3274
},
{
"epoch": 0.71,
"grad_norm": 0.1788867861032486,
"learning_rate": 2.1075582425200286e-06,
"loss": 0.5061,
"step": 3275
},
{
"epoch": 0.71,
"grad_norm": 0.15546058118343353,
"learning_rate": 2.104712914414615e-06,
"loss": 0.5317,
"step": 3276
},
{
"epoch": 0.71,
"grad_norm": 0.1524634212255478,
"learning_rate": 2.101868996183454e-06,
"loss": 0.4956,
"step": 3277
},
{
"epoch": 0.71,
"grad_norm": 0.123813197016716,
"learning_rate": 2.0990264892114067e-06,
"loss": 0.5138,
"step": 3278
},
{
"epoch": 0.71,
"grad_norm": 0.16139627993106842,
"learning_rate": 2.0961853948826466e-06,
"loss": 0.5739,
"step": 3279
},
{
"epoch": 0.71,
"grad_norm": 0.17389854788780212,
"learning_rate": 2.093345714580656e-06,
"loss": 0.5025,
"step": 3280
},
{
"epoch": 0.71,
"grad_norm": 0.12925244867801666,
"learning_rate": 2.0905074496882333e-06,
"loss": 0.495,
"step": 3281
},
{
"epoch": 0.71,
"grad_norm": 0.16095152497291565,
"learning_rate": 2.0876706015874816e-06,
"loss": 0.5108,
"step": 3282
},
{
"epoch": 0.71,
"grad_norm": 0.1847510039806366,
"learning_rate": 2.0848351716598227e-06,
"loss": 0.5201,
"step": 3283
},
{
"epoch": 0.71,
"grad_norm": 0.20535904169082642,
"learning_rate": 2.0820011612859825e-06,
"loss": 0.4862,
"step": 3284
},
{
"epoch": 0.71,
"grad_norm": 0.14182107150554657,
"learning_rate": 2.0791685718459936e-06,
"loss": 0.5464,
"step": 3285
},
{
"epoch": 0.71,
"grad_norm": 0.12854298949241638,
"learning_rate": 2.076337404719203e-06,
"loss": 0.47,
"step": 3286
},
{
"epoch": 0.71,
"grad_norm": 0.1679810881614685,
"learning_rate": 2.073507661284257e-06,
"loss": 0.5748,
"step": 3287
},
{
"epoch": 0.71,
"grad_norm": 0.13592375814914703,
"learning_rate": 2.0706793429191156e-06,
"loss": 0.5212,
"step": 3288
},
{
"epoch": 0.71,
"grad_norm": 0.18078750371932983,
"learning_rate": 2.0678524510010416e-06,
"loss": 0.5825,
"step": 3289
},
{
"epoch": 0.71,
"grad_norm": 0.15121810138225555,
"learning_rate": 2.0650269869066048e-06,
"loss": 0.5514,
"step": 3290
},
{
"epoch": 0.71,
"grad_norm": 0.1346854269504547,
"learning_rate": 2.0622029520116798e-06,
"loss": 0.5185,
"step": 3291
},
{
"epoch": 0.71,
"grad_norm": 0.17876240611076355,
"learning_rate": 2.0593803476914407e-06,
"loss": 0.5203,
"step": 3292
},
{
"epoch": 0.71,
"grad_norm": 0.13062328100204468,
"learning_rate": 2.0565591753203713e-06,
"loss": 0.4912,
"step": 3293
},
{
"epoch": 0.71,
"grad_norm": 0.14382170140743256,
"learning_rate": 2.053739436272256e-06,
"loss": 0.5206,
"step": 3294
},
{
"epoch": 0.71,
"grad_norm": 0.1761350929737091,
"learning_rate": 2.0509211319201753e-06,
"loss": 0.5529,
"step": 3295
},
{
"epoch": 0.71,
"grad_norm": 0.14372633397579193,
"learning_rate": 2.0481042636365243e-06,
"loss": 0.5006,
"step": 3296
},
{
"epoch": 0.71,
"grad_norm": 0.17832966148853302,
"learning_rate": 2.045288832792985e-06,
"loss": 0.5626,
"step": 3297
},
{
"epoch": 0.71,
"grad_norm": 0.1652042418718338,
"learning_rate": 2.0424748407605468e-06,
"loss": 0.4809,
"step": 3298
},
{
"epoch": 0.71,
"grad_norm": 0.16764576733112335,
"learning_rate": 2.0396622889094984e-06,
"loss": 0.5232,
"step": 3299
},
{
"epoch": 0.71,
"grad_norm": 0.16923469305038452,
"learning_rate": 2.036851178609423e-06,
"loss": 0.5091,
"step": 3300
},
{
"epoch": 0.71,
"grad_norm": 0.18054278194904327,
"learning_rate": 2.0340415112292065e-06,
"loss": 0.5236,
"step": 3301
},
{
"epoch": 0.71,
"grad_norm": 0.18616452813148499,
"learning_rate": 2.0312332881370294e-06,
"loss": 0.486,
"step": 3302
},
{
"epoch": 0.71,
"grad_norm": 0.13819892704486847,
"learning_rate": 2.0284265107003715e-06,
"loss": 0.5546,
"step": 3303
},
{
"epoch": 0.71,
"grad_norm": 0.17926816642284393,
"learning_rate": 2.0256211802860044e-06,
"loss": 0.5188,
"step": 3304
},
{
"epoch": 0.71,
"grad_norm": 0.17235776782035828,
"learning_rate": 2.0228172982599974e-06,
"loss": 0.5168,
"step": 3305
},
{
"epoch": 0.71,
"grad_norm": 0.1339789479970932,
"learning_rate": 2.0200148659877185e-06,
"loss": 0.5189,
"step": 3306
},
{
"epoch": 0.71,
"grad_norm": 0.1831931173801422,
"learning_rate": 2.017213884833821e-06,
"loss": 0.5357,
"step": 3307
},
{
"epoch": 0.71,
"grad_norm": 0.1411074548959732,
"learning_rate": 2.014414356162258e-06,
"loss": 0.508,
"step": 3308
},
{
"epoch": 0.71,
"grad_norm": 0.17658570408821106,
"learning_rate": 2.0116162813362742e-06,
"loss": 0.4947,
"step": 3309
},
{
"epoch": 0.71,
"grad_norm": 0.15399529039859772,
"learning_rate": 2.0088196617184065e-06,
"loss": 0.4912,
"step": 3310
},
{
"epoch": 0.71,
"grad_norm": 0.14932404458522797,
"learning_rate": 2.0060244986704834e-06,
"loss": 0.5249,
"step": 3311
},
{
"epoch": 0.71,
"grad_norm": 0.1162419468164444,
"learning_rate": 2.00323079355362e-06,
"loss": 0.4978,
"step": 3312
},
{
"epoch": 0.71,
"grad_norm": 0.16213734447956085,
"learning_rate": 2.000438547728226e-06,
"loss": 0.5094,
"step": 3313
},
{
"epoch": 0.71,
"grad_norm": 0.1504596322774887,
"learning_rate": 1.997647762554e-06,
"loss": 0.4843,
"step": 3314
},
{
"epoch": 0.71,
"grad_norm": 0.14448657631874084,
"learning_rate": 1.994858439389929e-06,
"loss": 0.5749,
"step": 3315
},
{
"epoch": 0.71,
"grad_norm": 0.16685092449188232,
"learning_rate": 1.992070579594288e-06,
"loss": 0.4815,
"step": 3316
},
{
"epoch": 0.71,
"grad_norm": 0.17471322417259216,
"learning_rate": 1.9892841845246357e-06,
"loss": 0.5098,
"step": 3317
},
{
"epoch": 0.71,
"grad_norm": 0.16297274827957153,
"learning_rate": 1.9864992555378256e-06,
"loss": 0.5032,
"step": 3318
},
{
"epoch": 0.71,
"grad_norm": 0.15113520622253418,
"learning_rate": 1.983715793989987e-06,
"loss": 0.5265,
"step": 3319
},
{
"epoch": 0.72,
"grad_norm": 0.12620225548744202,
"learning_rate": 1.9809338012365438e-06,
"loss": 0.4534,
"step": 3320
},
{
"epoch": 0.72,
"grad_norm": 0.16422003507614136,
"learning_rate": 1.9781532786322005e-06,
"loss": 0.5404,
"step": 3321
},
{
"epoch": 0.72,
"grad_norm": 0.19491100311279297,
"learning_rate": 1.9753742275309456e-06,
"loss": 0.5811,
"step": 3322
},
{
"epoch": 0.72,
"grad_norm": 0.21426242589950562,
"learning_rate": 1.9725966492860536e-06,
"loss": 0.5046,
"step": 3323
},
{
"epoch": 0.72,
"grad_norm": 0.17637012898921967,
"learning_rate": 1.9698205452500772e-06,
"loss": 0.5513,
"step": 3324
},
{
"epoch": 0.72,
"grad_norm": 0.14277128875255585,
"learning_rate": 1.9670459167748552e-06,
"loss": 0.5306,
"step": 3325
},
{
"epoch": 0.72,
"grad_norm": 0.1377527266740799,
"learning_rate": 1.9642727652115056e-06,
"loss": 0.4497,
"step": 3326
},
{
"epoch": 0.72,
"grad_norm": 0.1276976764202118,
"learning_rate": 1.9615010919104296e-06,
"loss": 0.5035,
"step": 3327
},
{
"epoch": 0.72,
"grad_norm": 0.13921838998794556,
"learning_rate": 1.9587308982213077e-06,
"loss": 0.5545,
"step": 3328
},
{
"epoch": 0.72,
"grad_norm": 0.16659273207187653,
"learning_rate": 1.9559621854930968e-06,
"loss": 0.541,
"step": 3329
},
{
"epoch": 0.72,
"grad_norm": 0.17996352910995483,
"learning_rate": 1.953194955074038e-06,
"loss": 0.5388,
"step": 3330
},
{
"epoch": 0.72,
"grad_norm": 0.15658895671367645,
"learning_rate": 1.9504292083116442e-06,
"loss": 0.4891,
"step": 3331
},
{
"epoch": 0.72,
"grad_norm": 0.25563114881515503,
"learning_rate": 1.9476649465527116e-06,
"loss": 0.4923,
"step": 3332
},
{
"epoch": 0.72,
"grad_norm": 0.12000484019517899,
"learning_rate": 1.94490217114331e-06,
"loss": 0.464,
"step": 3333
},
{
"epoch": 0.72,
"grad_norm": 0.1276124268770218,
"learning_rate": 1.942140883428788e-06,
"loss": 0.4568,
"step": 3334
},
{
"epoch": 0.72,
"grad_norm": 0.1600504219532013,
"learning_rate": 1.939381084753769e-06,
"loss": 0.5439,
"step": 3335
},
{
"epoch": 0.72,
"grad_norm": 0.135470911860466,
"learning_rate": 1.936622776462147e-06,
"loss": 0.518,
"step": 3336
},
{
"epoch": 0.72,
"grad_norm": 0.15857979655265808,
"learning_rate": 1.933865959897096e-06,
"loss": 0.5453,
"step": 3337
},
{
"epoch": 0.72,
"grad_norm": 0.17830796539783478,
"learning_rate": 1.931110636401062e-06,
"loss": 0.5632,
"step": 3338
},
{
"epoch": 0.72,
"grad_norm": 0.1584271937608719,
"learning_rate": 1.9283568073157592e-06,
"loss": 0.5207,
"step": 3339
},
{
"epoch": 0.72,
"grad_norm": 0.1628987342119217,
"learning_rate": 1.925604473982185e-06,
"loss": 0.5068,
"step": 3340
},
{
"epoch": 0.72,
"grad_norm": 0.18597134947776794,
"learning_rate": 1.922853637740596e-06,
"loss": 0.532,
"step": 3341
},
{
"epoch": 0.72,
"grad_norm": 0.18993283808231354,
"learning_rate": 1.9201042999305276e-06,
"loss": 0.5386,
"step": 3342
},
{
"epoch": 0.72,
"grad_norm": 0.13225135207176208,
"learning_rate": 1.9173564618907843e-06,
"loss": 0.5157,
"step": 3343
},
{
"epoch": 0.72,
"grad_norm": 0.1662367582321167,
"learning_rate": 1.914610124959437e-06,
"loss": 0.5148,
"step": 3344
},
{
"epoch": 0.72,
"grad_norm": 0.1778230369091034,
"learning_rate": 1.9118652904738276e-06,
"loss": 0.4604,
"step": 3345
},
{
"epoch": 0.72,
"grad_norm": 0.17423133552074432,
"learning_rate": 1.9091219597705694e-06,
"loss": 0.5607,
"step": 3346
},
{
"epoch": 0.72,
"grad_norm": 0.15368233621120453,
"learning_rate": 1.9063801341855392e-06,
"loss": 0.511,
"step": 3347
},
{
"epoch": 0.72,
"grad_norm": 0.18018236756324768,
"learning_rate": 1.9036398150538842e-06,
"loss": 0.5437,
"step": 3348
},
{
"epoch": 0.72,
"grad_norm": 0.15523254871368408,
"learning_rate": 1.9009010037100133e-06,
"loss": 0.5262,
"step": 3349
},
{
"epoch": 0.72,
"grad_norm": 0.1918708235025406,
"learning_rate": 1.898163701487607e-06,
"loss": 0.5454,
"step": 3350
},
{
"epoch": 0.72,
"grad_norm": 0.16382472217082977,
"learning_rate": 1.8954279097196032e-06,
"loss": 0.5145,
"step": 3351
},
{
"epoch": 0.72,
"grad_norm": 0.1837274134159088,
"learning_rate": 1.8926936297382148e-06,
"loss": 0.5496,
"step": 3352
},
{
"epoch": 0.72,
"grad_norm": 0.15243403613567352,
"learning_rate": 1.8899608628749116e-06,
"loss": 0.499,
"step": 3353
},
{
"epoch": 0.72,
"grad_norm": 0.22877193987369537,
"learning_rate": 1.8872296104604255e-06,
"loss": 0.5414,
"step": 3354
},
{
"epoch": 0.72,
"grad_norm": 0.14050383865833282,
"learning_rate": 1.8844998738247562e-06,
"loss": 0.5615,
"step": 3355
},
{
"epoch": 0.72,
"grad_norm": 0.18556596338748932,
"learning_rate": 1.8817716542971593e-06,
"loss": 0.5232,
"step": 3356
},
{
"epoch": 0.72,
"grad_norm": 0.16367103159427643,
"learning_rate": 1.8790449532061556e-06,
"loss": 0.5193,
"step": 3357
},
{
"epoch": 0.72,
"grad_norm": 0.18238645792007446,
"learning_rate": 1.8763197718795262e-06,
"loss": 0.5631,
"step": 3358
},
{
"epoch": 0.72,
"grad_norm": 0.16355136036872864,
"learning_rate": 1.8735961116443118e-06,
"loss": 0.5209,
"step": 3359
},
{
"epoch": 0.72,
"grad_norm": 0.13203002512454987,
"learning_rate": 1.8708739738268133e-06,
"loss": 0.5164,
"step": 3360
},
{
"epoch": 0.72,
"grad_norm": 0.14900876581668854,
"learning_rate": 1.8681533597525859e-06,
"loss": 0.5436,
"step": 3361
},
{
"epoch": 0.72,
"grad_norm": 0.16481418907642365,
"learning_rate": 1.865434270746449e-06,
"loss": 0.4923,
"step": 3362
},
{
"epoch": 0.72,
"grad_norm": 0.1458396166563034,
"learning_rate": 1.8627167081324732e-06,
"loss": 0.4808,
"step": 3363
},
{
"epoch": 0.72,
"grad_norm": 0.15053486824035645,
"learning_rate": 1.8600006732339892e-06,
"loss": 0.4947,
"step": 3364
},
{
"epoch": 0.72,
"grad_norm": 0.1663227528333664,
"learning_rate": 1.8572861673735886e-06,
"loss": 0.5284,
"step": 3365
},
{
"epoch": 0.73,
"grad_norm": 0.17765146493911743,
"learning_rate": 1.8545731918731074e-06,
"loss": 0.5479,
"step": 3366
},
{
"epoch": 0.73,
"grad_norm": 0.17067447304725647,
"learning_rate": 1.8518617480536472e-06,
"loss": 0.5336,
"step": 3367
},
{
"epoch": 0.73,
"grad_norm": 0.15725915133953094,
"learning_rate": 1.8491518372355538e-06,
"loss": 0.5474,
"step": 3368
},
{
"epoch": 0.73,
"grad_norm": 0.17181500792503357,
"learning_rate": 1.8464434607384345e-06,
"loss": 0.541,
"step": 3369
},
{
"epoch": 0.73,
"grad_norm": 0.15348359942436218,
"learning_rate": 1.8437366198811463e-06,
"loss": 0.4621,
"step": 3370
},
{
"epoch": 0.73,
"grad_norm": 0.1679931879043579,
"learning_rate": 1.8410313159817982e-06,
"loss": 0.4791,
"step": 3371
},
{
"epoch": 0.73,
"grad_norm": 0.14566868543624878,
"learning_rate": 1.838327550357753e-06,
"loss": 0.4671,
"step": 3372
},
{
"epoch": 0.73,
"grad_norm": 0.12078259885311127,
"learning_rate": 1.83562532432562e-06,
"loss": 0.5072,
"step": 3373
},
{
"epoch": 0.73,
"grad_norm": 0.1410682201385498,
"learning_rate": 1.8329246392012622e-06,
"loss": 0.5099,
"step": 3374
},
{
"epoch": 0.73,
"grad_norm": 0.1579367220401764,
"learning_rate": 1.8302254962997934e-06,
"loss": 0.5076,
"step": 3375
},
{
"epoch": 0.73,
"grad_norm": 0.13609954714775085,
"learning_rate": 1.8275278969355714e-06,
"loss": 0.4894,
"step": 3376
},
{
"epoch": 0.73,
"grad_norm": 0.14277243614196777,
"learning_rate": 1.8248318424222071e-06,
"loss": 0.5272,
"step": 3377
},
{
"epoch": 0.73,
"grad_norm": 0.20254714787006378,
"learning_rate": 1.8221373340725568e-06,
"loss": 0.5519,
"step": 3378
},
{
"epoch": 0.73,
"grad_norm": 0.14397798478603363,
"learning_rate": 1.8194443731987254e-06,
"loss": 0.56,
"step": 3379
},
{
"epoch": 0.73,
"grad_norm": 0.1646426022052765,
"learning_rate": 1.8167529611120648e-06,
"loss": 0.5418,
"step": 3380
},
{
"epoch": 0.73,
"grad_norm": 0.15328004956245422,
"learning_rate": 1.8140630991231683e-06,
"loss": 0.515,
"step": 3381
},
{
"epoch": 0.73,
"grad_norm": 0.14614619314670563,
"learning_rate": 1.811374788541878e-06,
"loss": 0.461,
"step": 3382
},
{
"epoch": 0.73,
"grad_norm": 0.14326471090316772,
"learning_rate": 1.808688030677281e-06,
"loss": 0.5391,
"step": 3383
},
{
"epoch": 0.73,
"grad_norm": 0.16172616183757782,
"learning_rate": 1.8060028268377088e-06,
"loss": 0.4593,
"step": 3384
},
{
"epoch": 0.73,
"grad_norm": 0.16830769181251526,
"learning_rate": 1.8033191783307309e-06,
"loss": 0.5275,
"step": 3385
},
{
"epoch": 0.73,
"grad_norm": 0.16044098138809204,
"learning_rate": 1.8006370864631644e-06,
"loss": 0.4947,
"step": 3386
},
{
"epoch": 0.73,
"grad_norm": 0.16456788778305054,
"learning_rate": 1.7979565525410691e-06,
"loss": 0.5459,
"step": 3387
},
{
"epoch": 0.73,
"grad_norm": 0.18591398000717163,
"learning_rate": 1.7952775778697418e-06,
"loss": 0.5412,
"step": 3388
},
{
"epoch": 0.73,
"grad_norm": 0.16051456332206726,
"learning_rate": 1.7926001637537222e-06,
"loss": 0.5081,
"step": 3389
},
{
"epoch": 0.73,
"grad_norm": 0.1629684865474701,
"learning_rate": 1.7899243114967918e-06,
"loss": 0.5292,
"step": 3390
},
{
"epoch": 0.73,
"grad_norm": 0.171781525015831,
"learning_rate": 1.7872500224019696e-06,
"loss": 0.5031,
"step": 3391
},
{
"epoch": 0.73,
"grad_norm": 0.14067816734313965,
"learning_rate": 1.7845772977715148e-06,
"loss": 0.5218,
"step": 3392
},
{
"epoch": 0.73,
"grad_norm": 0.1485803723335266,
"learning_rate": 1.7819061389069208e-06,
"loss": 0.5542,
"step": 3393
},
{
"epoch": 0.73,
"grad_norm": 0.13566631078720093,
"learning_rate": 1.7792365471089252e-06,
"loss": 0.4824,
"step": 3394
},
{
"epoch": 0.73,
"grad_norm": 0.17010356485843658,
"learning_rate": 1.7765685236774937e-06,
"loss": 0.4645,
"step": 3395
},
{
"epoch": 0.73,
"grad_norm": 0.16269443929195404,
"learning_rate": 1.773902069911838e-06,
"loss": 0.4747,
"step": 3396
},
{
"epoch": 0.73,
"grad_norm": 0.20955312252044678,
"learning_rate": 1.7712371871104012e-06,
"loss": 0.4571,
"step": 3397
},
{
"epoch": 0.73,
"grad_norm": 0.13663814961910248,
"learning_rate": 1.7685738765708576e-06,
"loss": 0.4574,
"step": 3398
},
{
"epoch": 0.73,
"grad_norm": 0.15181677043437958,
"learning_rate": 1.765912139590123e-06,
"loss": 0.5594,
"step": 3399
},
{
"epoch": 0.73,
"grad_norm": 0.15845806896686554,
"learning_rate": 1.7632519774643391e-06,
"loss": 0.4867,
"step": 3400
},
{
"epoch": 0.73,
"grad_norm": 0.18000967800617218,
"learning_rate": 1.760593391488888e-06,
"loss": 0.486,
"step": 3401
},
{
"epoch": 0.73,
"grad_norm": 0.2162676602602005,
"learning_rate": 1.7579363829583794e-06,
"loss": 0.49,
"step": 3402
},
{
"epoch": 0.73,
"grad_norm": 0.1404118835926056,
"learning_rate": 1.7552809531666582e-06,
"loss": 0.4929,
"step": 3403
},
{
"epoch": 0.73,
"grad_norm": 0.1907334327697754,
"learning_rate": 1.7526271034067993e-06,
"loss": 0.5793,
"step": 3404
},
{
"epoch": 0.73,
"grad_norm": 0.16548538208007812,
"learning_rate": 1.749974834971106e-06,
"loss": 0.5448,
"step": 3405
},
{
"epoch": 0.73,
"grad_norm": 0.12368584424257278,
"learning_rate": 1.7473241491511139e-06,
"loss": 0.4833,
"step": 3406
},
{
"epoch": 0.73,
"grad_norm": 0.1966637670993805,
"learning_rate": 1.7446750472375879e-06,
"loss": 0.5532,
"step": 3407
},
{
"epoch": 0.73,
"grad_norm": 0.13835598528385162,
"learning_rate": 1.7420275305205214e-06,
"loss": 0.5279,
"step": 3408
},
{
"epoch": 0.73,
"grad_norm": 0.15932750701904297,
"learning_rate": 1.7393816002891368e-06,
"loss": 0.5535,
"step": 3409
},
{
"epoch": 0.73,
"grad_norm": 0.18594208359718323,
"learning_rate": 1.7367372578318797e-06,
"loss": 0.4495,
"step": 3410
},
{
"epoch": 0.73,
"grad_norm": 0.17896361649036407,
"learning_rate": 1.7340945044364293e-06,
"loss": 0.5242,
"step": 3411
},
{
"epoch": 0.73,
"grad_norm": 0.13597969710826874,
"learning_rate": 1.7314533413896833e-06,
"loss": 0.443,
"step": 3412
},
{
"epoch": 0.74,
"grad_norm": 0.15737301111221313,
"learning_rate": 1.7288137699777714e-06,
"loss": 0.5596,
"step": 3413
},
{
"epoch": 0.74,
"grad_norm": 0.14145611226558685,
"learning_rate": 1.7261757914860456e-06,
"loss": 0.5109,
"step": 3414
},
{
"epoch": 0.74,
"grad_norm": 0.17835848033428192,
"learning_rate": 1.7235394071990824e-06,
"loss": 0.5036,
"step": 3415
},
{
"epoch": 0.74,
"grad_norm": 0.21317198872566223,
"learning_rate": 1.720904618400684e-06,
"loss": 0.473,
"step": 3416
},
{
"epoch": 0.74,
"grad_norm": 0.13540047407150269,
"learning_rate": 1.7182714263738692e-06,
"loss": 0.538,
"step": 3417
},
{
"epoch": 0.74,
"grad_norm": 0.21162466704845428,
"learning_rate": 1.7156398324008871e-06,
"loss": 0.5771,
"step": 3418
},
{
"epoch": 0.74,
"grad_norm": 0.16375568509101868,
"learning_rate": 1.7130098377632065e-06,
"loss": 0.5353,
"step": 3419
},
{
"epoch": 0.74,
"grad_norm": 0.15535147488117218,
"learning_rate": 1.7103814437415105e-06,
"loss": 0.4993,
"step": 3420
},
{
"epoch": 0.74,
"grad_norm": 0.17422151565551758,
"learning_rate": 1.7077546516157156e-06,
"loss": 0.5527,
"step": 3421
},
{
"epoch": 0.74,
"grad_norm": 0.15542204678058624,
"learning_rate": 1.7051294626649462e-06,
"loss": 0.5521,
"step": 3422
},
{
"epoch": 0.74,
"grad_norm": 0.16863486170768738,
"learning_rate": 1.702505878167553e-06,
"loss": 0.5245,
"step": 3423
},
{
"epoch": 0.74,
"grad_norm": 0.17205984890460968,
"learning_rate": 1.6998838994011041e-06,
"loss": 0.5189,
"step": 3424
},
{
"epoch": 0.74,
"grad_norm": 0.15427836775779724,
"learning_rate": 1.6972635276423815e-06,
"loss": 0.5309,
"step": 3425
},
{
"epoch": 0.74,
"grad_norm": 0.12143069505691528,
"learning_rate": 1.6946447641673907e-06,
"loss": 0.4993,
"step": 3426
},
{
"epoch": 0.74,
"grad_norm": 0.15743811428546906,
"learning_rate": 1.6920276102513512e-06,
"loss": 0.4693,
"step": 3427
},
{
"epoch": 0.74,
"grad_norm": 0.15306471288204193,
"learning_rate": 1.6894120671686986e-06,
"loss": 0.5164,
"step": 3428
},
{
"epoch": 0.74,
"grad_norm": 0.21616849303245544,
"learning_rate": 1.6867981361930864e-06,
"loss": 0.5525,
"step": 3429
},
{
"epoch": 0.74,
"grad_norm": 0.1986081451177597,
"learning_rate": 1.6841858185973775e-06,
"loss": 0.5335,
"step": 3430
},
{
"epoch": 0.74,
"grad_norm": 0.1559258997440338,
"learning_rate": 1.681575115653656e-06,
"loss": 0.4944,
"step": 3431
},
{
"epoch": 0.74,
"grad_norm": 0.12250448018312454,
"learning_rate": 1.6789660286332132e-06,
"loss": 0.5096,
"step": 3432
},
{
"epoch": 0.74,
"grad_norm": 0.3296593129634857,
"learning_rate": 1.6763585588065579e-06,
"loss": 0.5291,
"step": 3433
},
{
"epoch": 0.74,
"grad_norm": 0.15359769761562347,
"learning_rate": 1.6737527074434135e-06,
"loss": 0.4591,
"step": 3434
},
{
"epoch": 0.74,
"grad_norm": 0.15591566264629364,
"learning_rate": 1.6711484758127088e-06,
"loss": 0.524,
"step": 3435
},
{
"epoch": 0.74,
"grad_norm": 0.16285593807697296,
"learning_rate": 1.6685458651825892e-06,
"loss": 0.4952,
"step": 3436
},
{
"epoch": 0.74,
"grad_norm": 0.1407860666513443,
"learning_rate": 1.6659448768204062e-06,
"loss": 0.436,
"step": 3437
},
{
"epoch": 0.74,
"grad_norm": 0.15365861356258392,
"learning_rate": 1.6633455119927256e-06,
"loss": 0.5039,
"step": 3438
},
{
"epoch": 0.74,
"grad_norm": 0.18231172859668732,
"learning_rate": 1.6607477719653198e-06,
"loss": 0.5312,
"step": 3439
},
{
"epoch": 0.74,
"grad_norm": 0.1712518185377121,
"learning_rate": 1.658151658003172e-06,
"loss": 0.5649,
"step": 3440
},
{
"epoch": 0.74,
"grad_norm": 0.19198983907699585,
"learning_rate": 1.6555571713704743e-06,
"loss": 0.5381,
"step": 3441
},
{
"epoch": 0.74,
"grad_norm": 0.14253763854503632,
"learning_rate": 1.6529643133306212e-06,
"loss": 0.545,
"step": 3442
},
{
"epoch": 0.74,
"grad_norm": 0.16237348318099976,
"learning_rate": 1.6503730851462208e-06,
"loss": 0.5184,
"step": 3443
},
{
"epoch": 0.74,
"grad_norm": 0.1761130690574646,
"learning_rate": 1.647783488079081e-06,
"loss": 0.547,
"step": 3444
},
{
"epoch": 0.74,
"grad_norm": 0.17866788804531097,
"learning_rate": 1.6451955233902206e-06,
"loss": 0.5351,
"step": 3445
},
{
"epoch": 0.74,
"grad_norm": 0.176165372133255,
"learning_rate": 1.6426091923398619e-06,
"loss": 0.4789,
"step": 3446
},
{
"epoch": 0.74,
"grad_norm": 0.21256986260414124,
"learning_rate": 1.6400244961874311e-06,
"loss": 0.5431,
"step": 3447
},
{
"epoch": 0.74,
"grad_norm": 0.17698679864406586,
"learning_rate": 1.6374414361915613e-06,
"loss": 0.515,
"step": 3448
},
{
"epoch": 0.74,
"grad_norm": 0.13463236391544342,
"learning_rate": 1.6348600136100817e-06,
"loss": 0.5694,
"step": 3449
},
{
"epoch": 0.74,
"grad_norm": 0.15754647552967072,
"learning_rate": 1.6322802297000306e-06,
"loss": 0.5126,
"step": 3450
},
{
"epoch": 0.74,
"grad_norm": 0.19439859688282013,
"learning_rate": 1.6297020857176466e-06,
"loss": 0.5368,
"step": 3451
},
{
"epoch": 0.74,
"grad_norm": 0.1266242265701294,
"learning_rate": 1.6271255829183702e-06,
"loss": 0.502,
"step": 3452
},
{
"epoch": 0.74,
"grad_norm": 0.18297268450260162,
"learning_rate": 1.6245507225568425e-06,
"loss": 0.4904,
"step": 3453
},
{
"epoch": 0.74,
"grad_norm": 0.1342281848192215,
"learning_rate": 1.6219775058869019e-06,
"loss": 0.4823,
"step": 3454
},
{
"epoch": 0.74,
"grad_norm": 0.17030228674411774,
"learning_rate": 1.6194059341615908e-06,
"loss": 0.5196,
"step": 3455
},
{
"epoch": 0.74,
"grad_norm": 0.1725773811340332,
"learning_rate": 1.6168360086331498e-06,
"loss": 0.4785,
"step": 3456
},
{
"epoch": 0.74,
"grad_norm": 0.16261161863803864,
"learning_rate": 1.614267730553013e-06,
"loss": 0.481,
"step": 3457
},
{
"epoch": 0.74,
"grad_norm": 0.16986827552318573,
"learning_rate": 1.6117011011718188e-06,
"loss": 0.4874,
"step": 3458
},
{
"epoch": 0.75,
"grad_norm": 0.148764505982399,
"learning_rate": 1.6091361217393992e-06,
"loss": 0.5044,
"step": 3459
},
{
"epoch": 0.75,
"grad_norm": 0.1262829303741455,
"learning_rate": 1.6065727935047837e-06,
"loss": 0.5185,
"step": 3460
},
{
"epoch": 0.75,
"grad_norm": 0.1936028003692627,
"learning_rate": 1.6040111177161994e-06,
"loss": 0.5645,
"step": 3461
},
{
"epoch": 0.75,
"grad_norm": 0.1676550656557083,
"learning_rate": 1.6014510956210632e-06,
"loss": 0.5394,
"step": 3462
},
{
"epoch": 0.75,
"grad_norm": 0.19447743892669678,
"learning_rate": 1.5988927284659921e-06,
"loss": 0.5471,
"step": 3463
},
{
"epoch": 0.75,
"grad_norm": 0.14620442688465118,
"learning_rate": 1.5963360174967956e-06,
"loss": 0.493,
"step": 3464
},
{
"epoch": 0.75,
"grad_norm": 0.1667238175868988,
"learning_rate": 1.593780963958479e-06,
"loss": 0.5172,
"step": 3465
},
{
"epoch": 0.75,
"grad_norm": 0.14564719796180725,
"learning_rate": 1.5912275690952339e-06,
"loss": 0.5031,
"step": 3466
},
{
"epoch": 0.75,
"grad_norm": 0.17484711110591888,
"learning_rate": 1.5886758341504506e-06,
"loss": 0.4841,
"step": 3467
},
{
"epoch": 0.75,
"grad_norm": 0.17965911328792572,
"learning_rate": 1.5861257603667106e-06,
"loss": 0.5354,
"step": 3468
},
{
"epoch": 0.75,
"grad_norm": 0.16746492683887482,
"learning_rate": 1.5835773489857813e-06,
"loss": 0.5087,
"step": 3469
},
{
"epoch": 0.75,
"grad_norm": 0.15599916875362396,
"learning_rate": 1.581030601248626e-06,
"loss": 0.5562,
"step": 3470
},
{
"epoch": 0.75,
"grad_norm": 0.13977332413196564,
"learning_rate": 1.5784855183953956e-06,
"loss": 0.5003,
"step": 3471
},
{
"epoch": 0.75,
"grad_norm": 0.19692641496658325,
"learning_rate": 1.5759421016654314e-06,
"loss": 0.4618,
"step": 3472
},
{
"epoch": 0.75,
"grad_norm": 0.19811460375785828,
"learning_rate": 1.5734003522972635e-06,
"loss": 0.4771,
"step": 3473
},
{
"epoch": 0.75,
"grad_norm": 0.15085352957248688,
"learning_rate": 1.570860271528607e-06,
"loss": 0.5023,
"step": 3474
},
{
"epoch": 0.75,
"grad_norm": 0.16862472891807556,
"learning_rate": 1.5683218605963686e-06,
"loss": 0.5323,
"step": 3475
},
{
"epoch": 0.75,
"grad_norm": 0.14913085103034973,
"learning_rate": 1.5657851207366359e-06,
"loss": 0.5062,
"step": 3476
},
{
"epoch": 0.75,
"grad_norm": 0.17679521441459656,
"learning_rate": 1.5632500531846916e-06,
"loss": 0.4542,
"step": 3477
},
{
"epoch": 0.75,
"grad_norm": 0.14363497495651245,
"learning_rate": 1.5607166591749995e-06,
"loss": 0.5322,
"step": 3478
},
{
"epoch": 0.75,
"grad_norm": 0.1939202845096588,
"learning_rate": 1.5581849399412047e-06,
"loss": 0.5045,
"step": 3479
},
{
"epoch": 0.75,
"grad_norm": 0.15432246029376984,
"learning_rate": 1.555654896716144e-06,
"loss": 0.5493,
"step": 3480
},
{
"epoch": 0.75,
"grad_norm": 0.14659973978996277,
"learning_rate": 1.55312653073183e-06,
"loss": 0.5053,
"step": 3481
},
{
"epoch": 0.75,
"grad_norm": 0.14200441539287567,
"learning_rate": 1.5505998432194658e-06,
"loss": 0.4921,
"step": 3482
},
{
"epoch": 0.75,
"grad_norm": 0.16654643416404724,
"learning_rate": 1.5480748354094332e-06,
"loss": 0.4844,
"step": 3483
},
{
"epoch": 0.75,
"grad_norm": 0.21012644469738007,
"learning_rate": 1.5455515085312984e-06,
"loss": 0.5075,
"step": 3484
},
{
"epoch": 0.75,
"grad_norm": 0.20673668384552002,
"learning_rate": 1.543029863813808e-06,
"loss": 0.5155,
"step": 3485
},
{
"epoch": 0.75,
"grad_norm": 0.16482393443584442,
"learning_rate": 1.5405099024848874e-06,
"loss": 0.4767,
"step": 3486
},
{
"epoch": 0.75,
"grad_norm": 0.14894434809684753,
"learning_rate": 1.5379916257716448e-06,
"loss": 0.5139,
"step": 3487
},
{
"epoch": 0.75,
"grad_norm": 0.14717522263526917,
"learning_rate": 1.5354750349003694e-06,
"loss": 0.5422,
"step": 3488
},
{
"epoch": 0.75,
"grad_norm": 0.1304522007703781,
"learning_rate": 1.5329601310965225e-06,
"loss": 0.5312,
"step": 3489
},
{
"epoch": 0.75,
"grad_norm": 0.20385202765464783,
"learning_rate": 1.5304469155847556e-06,
"loss": 0.5567,
"step": 3490
},
{
"epoch": 0.75,
"grad_norm": 0.13958947360515594,
"learning_rate": 1.527935389588886e-06,
"loss": 0.5514,
"step": 3491
},
{
"epoch": 0.75,
"grad_norm": 0.17612220346927643,
"learning_rate": 1.5254255543319168e-06,
"loss": 0.4965,
"step": 3492
},
{
"epoch": 0.75,
"grad_norm": 0.1380666047334671,
"learning_rate": 1.5229174110360222e-06,
"loss": 0.5664,
"step": 3493
},
{
"epoch": 0.75,
"grad_norm": 0.14796659350395203,
"learning_rate": 1.5204109609225553e-06,
"loss": 0.4855,
"step": 3494
},
{
"epoch": 0.75,
"grad_norm": 0.17148888111114502,
"learning_rate": 1.5179062052120459e-06,
"loss": 0.4734,
"step": 3495
},
{
"epoch": 0.75,
"grad_norm": 0.15438951551914215,
"learning_rate": 1.5154031451241952e-06,
"loss": 0.5619,
"step": 3496
},
{
"epoch": 0.75,
"grad_norm": 0.18706923723220825,
"learning_rate": 1.5129017818778835e-06,
"loss": 0.5614,
"step": 3497
},
{
"epoch": 0.75,
"grad_norm": 0.15529923141002655,
"learning_rate": 1.5104021166911582e-06,
"loss": 0.5682,
"step": 3498
},
{
"epoch": 0.75,
"grad_norm": 0.21242637932300568,
"learning_rate": 1.5079041507812454e-06,
"loss": 0.5401,
"step": 3499
},
{
"epoch": 0.75,
"grad_norm": 0.18680711090564728,
"learning_rate": 1.5054078853645432e-06,
"loss": 0.5004,
"step": 3500
},
{
"epoch": 0.75,
"grad_norm": 0.1526576429605484,
"learning_rate": 1.5029133216566172e-06,
"loss": 0.4787,
"step": 3501
},
{
"epoch": 0.75,
"grad_norm": 0.16828754544258118,
"learning_rate": 1.5004204608722088e-06,
"loss": 0.5431,
"step": 3502
},
{
"epoch": 0.75,
"grad_norm": 0.12067432701587677,
"learning_rate": 1.4979293042252291e-06,
"loss": 0.475,
"step": 3503
},
{
"epoch": 0.75,
"grad_norm": 0.2365456521511078,
"learning_rate": 1.495439852928759e-06,
"loss": 0.5043,
"step": 3504
},
{
"epoch": 0.76,
"grad_norm": 0.15215608477592468,
"learning_rate": 1.492952108195051e-06,
"loss": 0.5572,
"step": 3505
},
{
"epoch": 0.76,
"grad_norm": 0.12552374601364136,
"learning_rate": 1.4904660712355207e-06,
"loss": 0.4765,
"step": 3506
},
{
"epoch": 0.76,
"grad_norm": 0.14761190116405487,
"learning_rate": 1.4879817432607573e-06,
"loss": 0.5246,
"step": 3507
},
{
"epoch": 0.76,
"grad_norm": 0.18243740499019623,
"learning_rate": 1.4854991254805179e-06,
"loss": 0.5586,
"step": 3508
},
{
"epoch": 0.76,
"grad_norm": 0.14932124316692352,
"learning_rate": 1.4830182191037246e-06,
"loss": 0.5113,
"step": 3509
},
{
"epoch": 0.76,
"grad_norm": 0.14290283620357513,
"learning_rate": 1.4805390253384683e-06,
"loss": 0.5141,
"step": 3510
},
{
"epoch": 0.76,
"grad_norm": 0.1591915637254715,
"learning_rate": 1.4780615453920016e-06,
"loss": 0.5043,
"step": 3511
},
{
"epoch": 0.76,
"grad_norm": 0.15741455554962158,
"learning_rate": 1.4755857804707485e-06,
"loss": 0.5195,
"step": 3512
},
{
"epoch": 0.76,
"grad_norm": 0.16539999842643738,
"learning_rate": 1.4731117317802923e-06,
"loss": 0.5353,
"step": 3513
},
{
"epoch": 0.76,
"grad_norm": 0.13506761193275452,
"learning_rate": 1.4706394005253838e-06,
"loss": 0.5446,
"step": 3514
},
{
"epoch": 0.76,
"grad_norm": 0.1319400519132614,
"learning_rate": 1.4681687879099376e-06,
"loss": 0.5075,
"step": 3515
},
{
"epoch": 0.76,
"grad_norm": 0.1385585367679596,
"learning_rate": 1.465699895137031e-06,
"loss": 0.5429,
"step": 3516
},
{
"epoch": 0.76,
"grad_norm": 0.12562334537506104,
"learning_rate": 1.463232723408904e-06,
"loss": 0.5114,
"step": 3517
},
{
"epoch": 0.76,
"grad_norm": 0.1627231389284134,
"learning_rate": 1.4607672739269552e-06,
"loss": 0.4937,
"step": 3518
},
{
"epoch": 0.76,
"grad_norm": 0.18348899483680725,
"learning_rate": 1.458303547891749e-06,
"loss": 0.5292,
"step": 3519
},
{
"epoch": 0.76,
"grad_norm": 0.17182128131389618,
"learning_rate": 1.455841546503009e-06,
"loss": 0.5041,
"step": 3520
},
{
"epoch": 0.76,
"grad_norm": 0.1899326741695404,
"learning_rate": 1.4533812709596184e-06,
"loss": 0.5299,
"step": 3521
},
{
"epoch": 0.76,
"grad_norm": 0.2223159819841385,
"learning_rate": 1.450922722459623e-06,
"loss": 0.535,
"step": 3522
},
{
"epoch": 0.76,
"grad_norm": 0.16189001500606537,
"learning_rate": 1.4484659022002208e-06,
"loss": 0.5021,
"step": 3523
},
{
"epoch": 0.76,
"grad_norm": 0.19098880887031555,
"learning_rate": 1.446010811377776e-06,
"loss": 0.5376,
"step": 3524
},
{
"epoch": 0.76,
"grad_norm": 0.19344013929367065,
"learning_rate": 1.4435574511878037e-06,
"loss": 0.5651,
"step": 3525
},
{
"epoch": 0.76,
"grad_norm": 0.13735520839691162,
"learning_rate": 1.4411058228249824e-06,
"loss": 0.5185,
"step": 3526
},
{
"epoch": 0.76,
"grad_norm": 0.15287934243679047,
"learning_rate": 1.438655927483143e-06,
"loss": 0.538,
"step": 3527
},
{
"epoch": 0.76,
"grad_norm": 0.14649856090545654,
"learning_rate": 1.4362077663552754e-06,
"loss": 0.4895,
"step": 3528
},
{
"epoch": 0.76,
"grad_norm": 0.171720489859581,
"learning_rate": 1.4337613406335244e-06,
"loss": 0.5108,
"step": 3529
},
{
"epoch": 0.76,
"grad_norm": 0.15773873031139374,
"learning_rate": 1.4313166515091863e-06,
"loss": 0.5321,
"step": 3530
},
{
"epoch": 0.76,
"grad_norm": 0.1555633246898651,
"learning_rate": 1.428873700172716e-06,
"loss": 0.533,
"step": 3531
},
{
"epoch": 0.76,
"grad_norm": 0.15259774029254913,
"learning_rate": 1.4264324878137204e-06,
"loss": 0.5034,
"step": 3532
},
{
"epoch": 0.76,
"grad_norm": 0.13174332678318024,
"learning_rate": 1.4239930156209597e-06,
"loss": 0.5052,
"step": 3533
},
{
"epoch": 0.76,
"grad_norm": 0.15307480096817017,
"learning_rate": 1.421555284782349e-06,
"loss": 0.5494,
"step": 3534
},
{
"epoch": 0.76,
"grad_norm": 0.14553479850292206,
"learning_rate": 1.4191192964849492e-06,
"loss": 0.5103,
"step": 3535
},
{
"epoch": 0.76,
"grad_norm": 0.1426243633031845,
"learning_rate": 1.4166850519149794e-06,
"loss": 0.4749,
"step": 3536
},
{
"epoch": 0.76,
"grad_norm": 0.1951218992471695,
"learning_rate": 1.4142525522578082e-06,
"loss": 0.4723,
"step": 3537
},
{
"epoch": 0.76,
"grad_norm": 0.15164697170257568,
"learning_rate": 1.41182179869795e-06,
"loss": 0.5262,
"step": 3538
},
{
"epoch": 0.76,
"grad_norm": 0.15860851109027863,
"learning_rate": 1.409392792419073e-06,
"loss": 0.5282,
"step": 3539
},
{
"epoch": 0.76,
"grad_norm": 0.1473582684993744,
"learning_rate": 1.406965534603995e-06,
"loss": 0.5385,
"step": 3540
},
{
"epoch": 0.76,
"grad_norm": 0.15201956033706665,
"learning_rate": 1.404540026434681e-06,
"loss": 0.5183,
"step": 3541
},
{
"epoch": 0.76,
"grad_norm": 0.18102842569351196,
"learning_rate": 1.4021162690922441e-06,
"loss": 0.5474,
"step": 3542
},
{
"epoch": 0.76,
"grad_norm": 0.13703973591327667,
"learning_rate": 1.3996942637569438e-06,
"loss": 0.5333,
"step": 3543
},
{
"epoch": 0.76,
"grad_norm": 0.14135409891605377,
"learning_rate": 1.397274011608189e-06,
"loss": 0.5164,
"step": 3544
},
{
"epoch": 0.76,
"grad_norm": 0.14163129031658173,
"learning_rate": 1.3948555138245295e-06,
"loss": 0.5044,
"step": 3545
},
{
"epoch": 0.76,
"grad_norm": 0.17711517214775085,
"learning_rate": 1.3924387715836706e-06,
"loss": 0.5235,
"step": 3546
},
{
"epoch": 0.76,
"grad_norm": 0.14216595888137817,
"learning_rate": 1.390023786062452e-06,
"loss": 0.4795,
"step": 3547
},
{
"epoch": 0.76,
"grad_norm": 0.16309070587158203,
"learning_rate": 1.3876105584368653e-06,
"loss": 0.5377,
"step": 3548
},
{
"epoch": 0.76,
"grad_norm": 0.1565747708082199,
"learning_rate": 1.3851990898820439e-06,
"loss": 0.5447,
"step": 3549
},
{
"epoch": 0.76,
"grad_norm": 0.16009236872196198,
"learning_rate": 1.3827893815722614e-06,
"loss": 0.5253,
"step": 3550
},
{
"epoch": 0.76,
"grad_norm": 0.14546410739421844,
"learning_rate": 1.3803814346809386e-06,
"loss": 0.5234,
"step": 3551
},
{
"epoch": 0.77,
"grad_norm": 0.16906693577766418,
"learning_rate": 1.3779752503806375e-06,
"loss": 0.5151,
"step": 3552
},
{
"epoch": 0.77,
"grad_norm": 0.16307243704795837,
"learning_rate": 1.3755708298430614e-06,
"loss": 0.4965,
"step": 3553
},
{
"epoch": 0.77,
"grad_norm": 0.17134273052215576,
"learning_rate": 1.3731681742390558e-06,
"loss": 0.4913,
"step": 3554
},
{
"epoch": 0.77,
"grad_norm": 0.1275198608636856,
"learning_rate": 1.3707672847386021e-06,
"loss": 0.4962,
"step": 3555
},
{
"epoch": 0.77,
"grad_norm": 0.14432884752750397,
"learning_rate": 1.368368162510829e-06,
"loss": 0.4881,
"step": 3556
},
{
"epoch": 0.77,
"grad_norm": 0.17812266945838928,
"learning_rate": 1.3659708087239981e-06,
"loss": 0.5165,
"step": 3557
},
{
"epoch": 0.77,
"grad_norm": 0.13962845504283905,
"learning_rate": 1.363575224545512e-06,
"loss": 0.5171,
"step": 3558
},
{
"epoch": 0.77,
"grad_norm": 0.15105730295181274,
"learning_rate": 1.3611814111419163e-06,
"loss": 0.5498,
"step": 3559
},
{
"epoch": 0.77,
"grad_norm": 0.12979727983474731,
"learning_rate": 1.3587893696788868e-06,
"loss": 0.4992,
"step": 3560
},
{
"epoch": 0.77,
"grad_norm": 0.13221748173236847,
"learning_rate": 1.3563991013212424e-06,
"loss": 0.5035,
"step": 3561
},
{
"epoch": 0.77,
"grad_norm": 0.19586306810379028,
"learning_rate": 1.3540106072329323e-06,
"loss": 0.5264,
"step": 3562
},
{
"epoch": 0.77,
"grad_norm": 0.17884129285812378,
"learning_rate": 1.3516238885770477e-06,
"loss": 0.5184,
"step": 3563
},
{
"epoch": 0.77,
"grad_norm": 0.16461104154586792,
"learning_rate": 1.349238946515813e-06,
"loss": 0.5141,
"step": 3564
},
{
"epoch": 0.77,
"grad_norm": 0.14698609709739685,
"learning_rate": 1.3468557822105864e-06,
"loss": 0.5084,
"step": 3565
},
{
"epoch": 0.77,
"grad_norm": 0.1535801738500595,
"learning_rate": 1.344474396821865e-06,
"loss": 0.5352,
"step": 3566
},
{
"epoch": 0.77,
"grad_norm": 0.20536081492900848,
"learning_rate": 1.3420947915092708e-06,
"loss": 0.5344,
"step": 3567
},
{
"epoch": 0.77,
"grad_norm": 0.1282234936952591,
"learning_rate": 1.3397169674315668e-06,
"loss": 0.5156,
"step": 3568
},
{
"epoch": 0.77,
"grad_norm": 0.16148659586906433,
"learning_rate": 1.337340925746648e-06,
"loss": 0.5496,
"step": 3569
},
{
"epoch": 0.77,
"grad_norm": 0.13853482902050018,
"learning_rate": 1.3349666676115358e-06,
"loss": 0.5359,
"step": 3570
},
{
"epoch": 0.77,
"grad_norm": 0.1656581163406372,
"learning_rate": 1.332594194182389e-06,
"loss": 0.4921,
"step": 3571
},
{
"epoch": 0.77,
"grad_norm": 0.20992939174175262,
"learning_rate": 1.3302235066144948e-06,
"loss": 0.5248,
"step": 3572
},
{
"epoch": 0.77,
"grad_norm": 0.15347984433174133,
"learning_rate": 1.3278546060622727e-06,
"loss": 0.5024,
"step": 3573
},
{
"epoch": 0.77,
"grad_norm": 0.15400084853172302,
"learning_rate": 1.3254874936792672e-06,
"loss": 0.5103,
"step": 3574
},
{
"epoch": 0.77,
"grad_norm": 0.23529481887817383,
"learning_rate": 1.3231221706181575e-06,
"loss": 0.4866,
"step": 3575
},
{
"epoch": 0.77,
"grad_norm": 0.18368123471736908,
"learning_rate": 1.3207586380307486e-06,
"loss": 0.4999,
"step": 3576
},
{
"epoch": 0.77,
"grad_norm": 0.1438288390636444,
"learning_rate": 1.318396897067975e-06,
"loss": 0.6058,
"step": 3577
},
{
"epoch": 0.77,
"grad_norm": 0.13895906507968903,
"learning_rate": 1.3160369488798984e-06,
"loss": 0.4721,
"step": 3578
},
{
"epoch": 0.77,
"grad_norm": 0.1586867719888687,
"learning_rate": 1.3136787946157055e-06,
"loss": 0.5271,
"step": 3579
},
{
"epoch": 0.77,
"grad_norm": 0.15690717101097107,
"learning_rate": 1.3113224354237113e-06,
"loss": 0.5475,
"step": 3580
},
{
"epoch": 0.77,
"grad_norm": 0.15662527084350586,
"learning_rate": 1.3089678724513589e-06,
"loss": 0.5388,
"step": 3581
},
{
"epoch": 0.77,
"grad_norm": 0.12812086939811707,
"learning_rate": 1.306615106845211e-06,
"loss": 0.4969,
"step": 3582
},
{
"epoch": 0.77,
"grad_norm": 0.14390747249126434,
"learning_rate": 1.3042641397509597e-06,
"loss": 0.4779,
"step": 3583
},
{
"epoch": 0.77,
"grad_norm": 0.1376083791255951,
"learning_rate": 1.30191497231342e-06,
"loss": 0.5654,
"step": 3584
},
{
"epoch": 0.77,
"grad_norm": 0.20095455646514893,
"learning_rate": 1.299567605676531e-06,
"loss": 0.518,
"step": 3585
},
{
"epoch": 0.77,
"grad_norm": 0.16395071148872375,
"learning_rate": 1.2972220409833552e-06,
"loss": 0.5361,
"step": 3586
},
{
"epoch": 0.77,
"grad_norm": 0.12365875393152237,
"learning_rate": 1.2948782793760745e-06,
"loss": 0.5278,
"step": 3587
},
{
"epoch": 0.77,
"grad_norm": 0.1766958236694336,
"learning_rate": 1.2925363219959958e-06,
"loss": 0.569,
"step": 3588
},
{
"epoch": 0.77,
"grad_norm": 0.15711119771003723,
"learning_rate": 1.2901961699835475e-06,
"loss": 0.542,
"step": 3589
},
{
"epoch": 0.77,
"grad_norm": 0.16060031950473785,
"learning_rate": 1.2878578244782775e-06,
"loss": 0.5658,
"step": 3590
},
{
"epoch": 0.77,
"grad_norm": 0.15911975502967834,
"learning_rate": 1.2855212866188566e-06,
"loss": 0.5181,
"step": 3591
},
{
"epoch": 0.77,
"grad_norm": 0.16181927919387817,
"learning_rate": 1.2831865575430702e-06,
"loss": 0.5686,
"step": 3592
},
{
"epoch": 0.77,
"grad_norm": 0.1953277289867401,
"learning_rate": 1.2808536383878295e-06,
"loss": 0.5062,
"step": 3593
},
{
"epoch": 0.77,
"grad_norm": 0.17257684469223022,
"learning_rate": 1.2785225302891568e-06,
"loss": 0.4755,
"step": 3594
},
{
"epoch": 0.77,
"grad_norm": 0.17315979301929474,
"learning_rate": 1.2761932343821992e-06,
"loss": 0.5166,
"step": 3595
},
{
"epoch": 0.77,
"grad_norm": 0.19010502099990845,
"learning_rate": 1.2738657518012188e-06,
"loss": 0.5653,
"step": 3596
},
{
"epoch": 0.77,
"grad_norm": 0.1522989571094513,
"learning_rate": 1.2715400836795939e-06,
"loss": 0.5601,
"step": 3597
},
{
"epoch": 0.78,
"grad_norm": 0.15732337534427643,
"learning_rate": 1.2692162311498219e-06,
"loss": 0.5467,
"step": 3598
},
{
"epoch": 0.78,
"grad_norm": 0.16109420359134674,
"learning_rate": 1.266894195343511e-06,
"loss": 0.5328,
"step": 3599
},
{
"epoch": 0.78,
"grad_norm": 0.16374173760414124,
"learning_rate": 1.2645739773913911e-06,
"loss": 0.5324,
"step": 3600
},
{
"epoch": 0.78,
"grad_norm": 0.18841396272182465,
"learning_rate": 1.2622555784232992e-06,
"loss": 0.4905,
"step": 3601
},
{
"epoch": 0.78,
"grad_norm": 0.17438913881778717,
"learning_rate": 1.259938999568196e-06,
"loss": 0.4836,
"step": 3602
},
{
"epoch": 0.78,
"grad_norm": 0.1530585139989853,
"learning_rate": 1.2576242419541502e-06,
"loss": 0.4937,
"step": 3603
},
{
"epoch": 0.78,
"grad_norm": 0.16211232542991638,
"learning_rate": 1.2553113067083417e-06,
"loss": 0.5307,
"step": 3604
},
{
"epoch": 0.78,
"grad_norm": 0.18766599893569946,
"learning_rate": 1.2530001949570686e-06,
"loss": 0.4523,
"step": 3605
},
{
"epoch": 0.78,
"grad_norm": 0.14365822076797485,
"learning_rate": 1.2506909078257357e-06,
"loss": 0.5097,
"step": 3606
},
{
"epoch": 0.78,
"grad_norm": 0.13116493821144104,
"learning_rate": 1.2483834464388622e-06,
"loss": 0.5036,
"step": 3607
},
{
"epoch": 0.78,
"grad_norm": 0.15349233150482178,
"learning_rate": 1.2460778119200778e-06,
"loss": 0.4983,
"step": 3608
},
{
"epoch": 0.78,
"grad_norm": 0.15856203436851501,
"learning_rate": 1.2437740053921238e-06,
"loss": 0.4921,
"step": 3609
},
{
"epoch": 0.78,
"grad_norm": 0.13519662618637085,
"learning_rate": 1.24147202797685e-06,
"loss": 0.5291,
"step": 3610
},
{
"epoch": 0.78,
"grad_norm": 0.14394241571426392,
"learning_rate": 1.2391718807952142e-06,
"loss": 0.5235,
"step": 3611
},
{
"epoch": 0.78,
"grad_norm": 0.12606112658977509,
"learning_rate": 1.236873564967284e-06,
"loss": 0.4571,
"step": 3612
},
{
"epoch": 0.78,
"grad_norm": 0.1665707379579544,
"learning_rate": 1.2345770816122388e-06,
"loss": 0.5432,
"step": 3613
},
{
"epoch": 0.78,
"grad_norm": 0.14036637544631958,
"learning_rate": 1.2322824318483568e-06,
"loss": 0.4873,
"step": 3614
},
{
"epoch": 0.78,
"grad_norm": 0.1713072657585144,
"learning_rate": 1.2299896167930358e-06,
"loss": 0.5134,
"step": 3615
},
{
"epoch": 0.78,
"grad_norm": 0.15142671763896942,
"learning_rate": 1.227698637562768e-06,
"loss": 0.5193,
"step": 3616
},
{
"epoch": 0.78,
"grad_norm": 0.148328959941864,
"learning_rate": 1.2254094952731594e-06,
"loss": 0.5107,
"step": 3617
},
{
"epoch": 0.78,
"grad_norm": 0.15323348343372345,
"learning_rate": 1.2231221910389196e-06,
"loss": 0.5187,
"step": 3618
},
{
"epoch": 0.78,
"grad_norm": 0.13149654865264893,
"learning_rate": 1.2208367259738602e-06,
"loss": 0.5422,
"step": 3619
},
{
"epoch": 0.78,
"grad_norm": 0.15822039544582367,
"learning_rate": 1.2185531011909008e-06,
"loss": 0.493,
"step": 3620
},
{
"epoch": 0.78,
"grad_norm": 0.1450645625591278,
"learning_rate": 1.2162713178020641e-06,
"loss": 0.4954,
"step": 3621
},
{
"epoch": 0.78,
"grad_norm": 0.14330001175403595,
"learning_rate": 1.2139913769184757e-06,
"loss": 0.4457,
"step": 3622
},
{
"epoch": 0.78,
"grad_norm": 0.1793079823255539,
"learning_rate": 1.211713279650365e-06,
"loss": 0.5186,
"step": 3623
},
{
"epoch": 0.78,
"grad_norm": 0.1488538533449173,
"learning_rate": 1.2094370271070599e-06,
"loss": 0.5479,
"step": 3624
},
{
"epoch": 0.78,
"grad_norm": 0.18175008893013,
"learning_rate": 1.207162620396996e-06,
"loss": 0.5202,
"step": 3625
},
{
"epoch": 0.78,
"grad_norm": 0.15374873578548431,
"learning_rate": 1.2048900606277036e-06,
"loss": 0.5404,
"step": 3626
},
{
"epoch": 0.78,
"grad_norm": 0.1646018624305725,
"learning_rate": 1.2026193489058185e-06,
"loss": 0.5023,
"step": 3627
},
{
"epoch": 0.78,
"grad_norm": 0.13878245651721954,
"learning_rate": 1.2003504863370746e-06,
"loss": 0.4892,
"step": 3628
},
{
"epoch": 0.78,
"grad_norm": 0.1426292210817337,
"learning_rate": 1.1980834740263065e-06,
"loss": 0.5052,
"step": 3629
},
{
"epoch": 0.78,
"grad_norm": 0.15884622931480408,
"learning_rate": 1.195818313077447e-06,
"loss": 0.5517,
"step": 3630
},
{
"epoch": 0.78,
"grad_norm": 0.15096734464168549,
"learning_rate": 1.1935550045935252e-06,
"loss": 0.4624,
"step": 3631
},
{
"epoch": 0.78,
"grad_norm": 0.1437617689371109,
"learning_rate": 1.1912935496766719e-06,
"loss": 0.4659,
"step": 3632
},
{
"epoch": 0.78,
"grad_norm": 0.22621825337409973,
"learning_rate": 1.1890339494281133e-06,
"loss": 0.4706,
"step": 3633
},
{
"epoch": 0.78,
"grad_norm": 0.17736200988292694,
"learning_rate": 1.186776204948173e-06,
"loss": 0.5366,
"step": 3634
},
{
"epoch": 0.78,
"grad_norm": 0.1408076137304306,
"learning_rate": 1.1845203173362725e-06,
"loss": 0.5448,
"step": 3635
},
{
"epoch": 0.78,
"grad_norm": 0.15870949625968933,
"learning_rate": 1.182266287690924e-06,
"loss": 0.4851,
"step": 3636
},
{
"epoch": 0.78,
"grad_norm": 0.12970934808254242,
"learning_rate": 1.1800141171097412e-06,
"loss": 0.5177,
"step": 3637
},
{
"epoch": 0.78,
"grad_norm": 0.15456534922122955,
"learning_rate": 1.177763806689427e-06,
"loss": 0.4997,
"step": 3638
},
{
"epoch": 0.78,
"grad_norm": 0.14675313234329224,
"learning_rate": 1.175515357525781e-06,
"loss": 0.5198,
"step": 3639
},
{
"epoch": 0.78,
"grad_norm": 0.1615784913301468,
"learning_rate": 1.173268770713701e-06,
"loss": 0.5251,
"step": 3640
},
{
"epoch": 0.78,
"grad_norm": 0.19272805750370026,
"learning_rate": 1.1710240473471685e-06,
"loss": 0.5331,
"step": 3641
},
{
"epoch": 0.78,
"grad_norm": 0.15185219049453735,
"learning_rate": 1.1687811885192662e-06,
"loss": 0.5372,
"step": 3642
},
{
"epoch": 0.78,
"grad_norm": 0.1498749703168869,
"learning_rate": 1.1665401953221622e-06,
"loss": 0.4953,
"step": 3643
},
{
"epoch": 0.78,
"grad_norm": 0.12573790550231934,
"learning_rate": 1.16430106884712e-06,
"loss": 0.556,
"step": 3644
},
{
"epoch": 0.79,
"grad_norm": 0.15969648957252502,
"learning_rate": 1.1620638101844938e-06,
"loss": 0.4978,
"step": 3645
},
{
"epoch": 0.79,
"grad_norm": 0.14067567884922028,
"learning_rate": 1.159828420423728e-06,
"loss": 0.5057,
"step": 3646
},
{
"epoch": 0.79,
"grad_norm": 0.15133407711982727,
"learning_rate": 1.157594900653357e-06,
"loss": 0.5406,
"step": 3647
},
{
"epoch": 0.79,
"grad_norm": 0.1627969741821289,
"learning_rate": 1.1553632519610025e-06,
"loss": 0.5282,
"step": 3648
},
{
"epoch": 0.79,
"grad_norm": 0.163666769862175,
"learning_rate": 1.1531334754333772e-06,
"loss": 0.5655,
"step": 3649
},
{
"epoch": 0.79,
"grad_norm": 0.1481485515832901,
"learning_rate": 1.1509055721562839e-06,
"loss": 0.5012,
"step": 3650
},
{
"epoch": 0.79,
"grad_norm": 0.1437186896800995,
"learning_rate": 1.148679543214608e-06,
"loss": 0.4814,
"step": 3651
},
{
"epoch": 0.79,
"grad_norm": 0.1677170991897583,
"learning_rate": 1.1464553896923264e-06,
"loss": 0.5308,
"step": 3652
},
{
"epoch": 0.79,
"grad_norm": 0.15452422201633453,
"learning_rate": 1.1442331126725014e-06,
"loss": 0.4929,
"step": 3653
},
{
"epoch": 0.79,
"grad_norm": 0.1445283144712448,
"learning_rate": 1.1420127132372839e-06,
"loss": 0.5104,
"step": 3654
},
{
"epoch": 0.79,
"grad_norm": 0.13753926753997803,
"learning_rate": 1.1397941924679046e-06,
"loss": 0.4942,
"step": 3655
},
{
"epoch": 0.79,
"grad_norm": 0.1484784483909607,
"learning_rate": 1.1375775514446846e-06,
"loss": 0.5266,
"step": 3656
},
{
"epoch": 0.79,
"grad_norm": 0.21680472791194916,
"learning_rate": 1.1353627912470289e-06,
"loss": 0.5809,
"step": 3657
},
{
"epoch": 0.79,
"grad_norm": 0.14361554384231567,
"learning_rate": 1.1331499129534252e-06,
"loss": 0.5438,
"step": 3658
},
{
"epoch": 0.79,
"grad_norm": 0.14105179905891418,
"learning_rate": 1.1309389176414471e-06,
"loss": 0.5111,
"step": 3659
},
{
"epoch": 0.79,
"grad_norm": 0.1748628318309784,
"learning_rate": 1.128729806387746e-06,
"loss": 0.537,
"step": 3660
},
{
"epoch": 0.79,
"grad_norm": 0.162385493516922,
"learning_rate": 1.1265225802680623e-06,
"loss": 0.5325,
"step": 3661
},
{
"epoch": 0.79,
"grad_norm": 0.20901146531105042,
"learning_rate": 1.124317240357216e-06,
"loss": 0.5093,
"step": 3662
},
{
"epoch": 0.79,
"grad_norm": 0.14778484404087067,
"learning_rate": 1.122113787729106e-06,
"loss": 0.5484,
"step": 3663
},
{
"epoch": 0.79,
"grad_norm": 0.1553722620010376,
"learning_rate": 1.119912223456715e-06,
"loss": 0.5044,
"step": 3664
},
{
"epoch": 0.79,
"grad_norm": 0.15933193266391754,
"learning_rate": 1.117712548612106e-06,
"loss": 0.5267,
"step": 3665
},
{
"epoch": 0.79,
"grad_norm": 0.14769431948661804,
"learning_rate": 1.1155147642664217e-06,
"loss": 0.5027,
"step": 3666
},
{
"epoch": 0.79,
"grad_norm": 0.15224431455135345,
"learning_rate": 1.1133188714898846e-06,
"loss": 0.5068,
"step": 3667
},
{
"epoch": 0.79,
"grad_norm": 0.25812146067619324,
"learning_rate": 1.1111248713517935e-06,
"loss": 0.516,
"step": 3668
},
{
"epoch": 0.79,
"grad_norm": 0.1287028193473816,
"learning_rate": 1.1089327649205301e-06,
"loss": 0.497,
"step": 3669
},
{
"epoch": 0.79,
"grad_norm": 0.17315033078193665,
"learning_rate": 1.1067425532635463e-06,
"loss": 0.5764,
"step": 3670
},
{
"epoch": 0.79,
"grad_norm": 0.17579080164432526,
"learning_rate": 1.1045542374473821e-06,
"loss": 0.508,
"step": 3671
},
{
"epoch": 0.79,
"grad_norm": 0.1450372189283371,
"learning_rate": 1.1023678185376474e-06,
"loss": 0.5104,
"step": 3672
},
{
"epoch": 0.79,
"grad_norm": 0.2039722353219986,
"learning_rate": 1.1001832975990274e-06,
"loss": 0.5159,
"step": 3673
},
{
"epoch": 0.79,
"grad_norm": 0.15762609243392944,
"learning_rate": 1.0980006756952882e-06,
"loss": 0.5387,
"step": 3674
},
{
"epoch": 0.79,
"grad_norm": 0.14661352336406708,
"learning_rate": 1.095819953889265e-06,
"loss": 0.4672,
"step": 3675
},
{
"epoch": 0.79,
"grad_norm": 0.18748416006565094,
"learning_rate": 1.0936411332428732e-06,
"loss": 0.4949,
"step": 3676
},
{
"epoch": 0.79,
"grad_norm": 0.18095709383487701,
"learning_rate": 1.091464214817099e-06,
"loss": 0.5316,
"step": 3677
},
{
"epoch": 0.79,
"grad_norm": 0.13158752024173737,
"learning_rate": 1.089289199672004e-06,
"loss": 0.4978,
"step": 3678
},
{
"epoch": 0.79,
"grad_norm": 0.15241560339927673,
"learning_rate": 1.0871160888667242e-06,
"loss": 0.5195,
"step": 3679
},
{
"epoch": 0.79,
"grad_norm": 0.13921800255775452,
"learning_rate": 1.084944883459464e-06,
"loss": 0.5269,
"step": 3680
},
{
"epoch": 0.79,
"grad_norm": 0.16038571298122406,
"learning_rate": 1.0827755845075044e-06,
"loss": 0.5714,
"step": 3681
},
{
"epoch": 0.79,
"grad_norm": 0.1517871618270874,
"learning_rate": 1.0806081930671947e-06,
"loss": 0.4976,
"step": 3682
},
{
"epoch": 0.79,
"grad_norm": 0.1371169090270996,
"learning_rate": 1.0784427101939553e-06,
"loss": 0.5421,
"step": 3683
},
{
"epoch": 0.79,
"grad_norm": 0.13309676945209503,
"learning_rate": 1.0762791369422838e-06,
"loss": 0.4903,
"step": 3684
},
{
"epoch": 0.79,
"grad_norm": 0.15115486085414886,
"learning_rate": 1.0741174743657385e-06,
"loss": 0.5011,
"step": 3685
},
{
"epoch": 0.79,
"grad_norm": 0.1870022863149643,
"learning_rate": 1.0719577235169537e-06,
"loss": 0.5292,
"step": 3686
},
{
"epoch": 0.79,
"grad_norm": 0.37398022413253784,
"learning_rate": 1.0697998854476294e-06,
"loss": 0.5336,
"step": 3687
},
{
"epoch": 0.79,
"grad_norm": 0.1611040085554123,
"learning_rate": 1.0676439612085353e-06,
"loss": 0.5077,
"step": 3688
},
{
"epoch": 0.79,
"grad_norm": 0.14922092854976654,
"learning_rate": 1.0654899518495104e-06,
"loss": 0.5461,
"step": 3689
},
{
"epoch": 0.79,
"grad_norm": 0.1831667125225067,
"learning_rate": 1.0633378584194593e-06,
"loss": 0.4868,
"step": 3690
},
{
"epoch": 0.8,
"grad_norm": 0.15320684015750885,
"learning_rate": 1.0611876819663557e-06,
"loss": 0.5232,
"step": 3691
},
{
"epoch": 0.8,
"grad_norm": 0.14661262929439545,
"learning_rate": 1.059039423537237e-06,
"loss": 0.485,
"step": 3692
},
{
"epoch": 0.8,
"grad_norm": 0.13266430795192719,
"learning_rate": 1.0568930841782088e-06,
"loss": 0.5187,
"step": 3693
},
{
"epoch": 0.8,
"grad_norm": 0.22684414684772491,
"learning_rate": 1.054748664934443e-06,
"loss": 0.5477,
"step": 3694
},
{
"epoch": 0.8,
"grad_norm": 0.1602558046579361,
"learning_rate": 1.0526061668501708e-06,
"loss": 0.4672,
"step": 3695
},
{
"epoch": 0.8,
"grad_norm": 0.1579863578081131,
"learning_rate": 1.0504655909686978e-06,
"loss": 0.5119,
"step": 3696
},
{
"epoch": 0.8,
"grad_norm": 0.1422806680202484,
"learning_rate": 1.048326938332384e-06,
"loss": 0.549,
"step": 3697
},
{
"epoch": 0.8,
"grad_norm": 0.13555404543876648,
"learning_rate": 1.0461902099826577e-06,
"loss": 0.5839,
"step": 3698
},
{
"epoch": 0.8,
"grad_norm": 0.16685040295124054,
"learning_rate": 1.0440554069600112e-06,
"loss": 0.523,
"step": 3699
},
{
"epoch": 0.8,
"grad_norm": 0.15756739675998688,
"learning_rate": 1.0419225303039943e-06,
"loss": 0.4513,
"step": 3700
},
{
"epoch": 0.8,
"grad_norm": 0.15640777349472046,
"learning_rate": 1.0397915810532227e-06,
"loss": 0.525,
"step": 3701
},
{
"epoch": 0.8,
"grad_norm": 0.15122468769550323,
"learning_rate": 1.0376625602453733e-06,
"loss": 0.5116,
"step": 3702
},
{
"epoch": 0.8,
"grad_norm": 0.15925420820713043,
"learning_rate": 1.0355354689171831e-06,
"loss": 0.5259,
"step": 3703
},
{
"epoch": 0.8,
"grad_norm": 0.14913156628608704,
"learning_rate": 1.0334103081044504e-06,
"loss": 0.5148,
"step": 3704
},
{
"epoch": 0.8,
"grad_norm": 0.15156058967113495,
"learning_rate": 1.031287078842031e-06,
"loss": 0.5239,
"step": 3705
},
{
"epoch": 0.8,
"grad_norm": 0.1923210173845291,
"learning_rate": 1.0291657821638435e-06,
"loss": 0.5351,
"step": 3706
},
{
"epoch": 0.8,
"grad_norm": 0.16889818012714386,
"learning_rate": 1.0270464191028618e-06,
"loss": 0.5231,
"step": 3707
},
{
"epoch": 0.8,
"grad_norm": 0.13356614112854004,
"learning_rate": 1.024928990691121e-06,
"loss": 0.506,
"step": 3708
},
{
"epoch": 0.8,
"grad_norm": 0.1991608887910843,
"learning_rate": 1.0228134979597126e-06,
"loss": 0.5501,
"step": 3709
},
{
"epoch": 0.8,
"grad_norm": 0.15821781754493713,
"learning_rate": 1.0206999419387881e-06,
"loss": 0.5371,
"step": 3710
},
{
"epoch": 0.8,
"grad_norm": 0.1407308578491211,
"learning_rate": 1.0185883236575533e-06,
"loss": 0.5072,
"step": 3711
},
{
"epoch": 0.8,
"grad_norm": 0.14752401411533356,
"learning_rate": 1.0164786441442698e-06,
"loss": 0.5163,
"step": 3712
},
{
"epoch": 0.8,
"grad_norm": 0.14390012621879578,
"learning_rate": 1.0143709044262574e-06,
"loss": 0.4969,
"step": 3713
},
{
"epoch": 0.8,
"grad_norm": 0.1694592982530594,
"learning_rate": 1.0122651055298898e-06,
"loss": 0.4924,
"step": 3714
},
{
"epoch": 0.8,
"grad_norm": 0.15931564569473267,
"learning_rate": 1.0101612484805967e-06,
"loss": 0.4842,
"step": 3715
},
{
"epoch": 0.8,
"grad_norm": 0.16370849311351776,
"learning_rate": 1.0080593343028621e-06,
"loss": 0.497,
"step": 3716
},
{
"epoch": 0.8,
"grad_norm": 0.16331344842910767,
"learning_rate": 1.005959364020222e-06,
"loss": 0.4919,
"step": 3717
},
{
"epoch": 0.8,
"grad_norm": 0.1296970099210739,
"learning_rate": 1.0038613386552687e-06,
"loss": 0.5674,
"step": 3718
},
{
"epoch": 0.8,
"grad_norm": 0.15003569424152374,
"learning_rate": 1.001765259229644e-06,
"loss": 0.5164,
"step": 3719
},
{
"epoch": 0.8,
"grad_norm": 0.14973247051239014,
"learning_rate": 9.996711267640451e-07,
"loss": 0.4997,
"step": 3720
},
{
"epoch": 0.8,
"grad_norm": 0.14696918427944183,
"learning_rate": 9.975789422782205e-07,
"loss": 0.4806,
"step": 3721
},
{
"epoch": 0.8,
"grad_norm": 0.14564906060695648,
"learning_rate": 9.95488706790969e-07,
"loss": 0.5491,
"step": 3722
},
{
"epoch": 0.8,
"grad_norm": 0.18390415608882904,
"learning_rate": 9.934004213201431e-07,
"loss": 0.5264,
"step": 3723
},
{
"epoch": 0.8,
"grad_norm": 0.1590055525302887,
"learning_rate": 9.913140868826405e-07,
"loss": 0.497,
"step": 3724
},
{
"epoch": 0.8,
"grad_norm": 0.1445043832063675,
"learning_rate": 9.892297044944133e-07,
"loss": 0.5089,
"step": 3725
},
{
"epoch": 0.8,
"grad_norm": 0.15211768448352814,
"learning_rate": 9.871472751704625e-07,
"loss": 0.5093,
"step": 3726
},
{
"epoch": 0.8,
"grad_norm": 0.1753348559141159,
"learning_rate": 9.85066799924836e-07,
"loss": 0.5038,
"step": 3727
},
{
"epoch": 0.8,
"grad_norm": 0.14900852739810944,
"learning_rate": 9.829882797706336e-07,
"loss": 0.4721,
"step": 3728
},
{
"epoch": 0.8,
"grad_norm": 0.1514863520860672,
"learning_rate": 9.809117157199982e-07,
"loss": 0.5869,
"step": 3729
},
{
"epoch": 0.8,
"grad_norm": 0.15811176598072052,
"learning_rate": 9.788371087841236e-07,
"loss": 0.5396,
"step": 3730
},
{
"epoch": 0.8,
"grad_norm": 0.1752791553735733,
"learning_rate": 9.767644599732517e-07,
"loss": 0.4918,
"step": 3731
},
{
"epoch": 0.8,
"grad_norm": 0.17730747163295746,
"learning_rate": 9.74693770296667e-07,
"loss": 0.5003,
"step": 3732
},
{
"epoch": 0.8,
"grad_norm": 0.1746446192264557,
"learning_rate": 9.72625040762702e-07,
"loss": 0.4698,
"step": 3733
},
{
"epoch": 0.8,
"grad_norm": 0.14243842661380768,
"learning_rate": 9.705582723787348e-07,
"loss": 0.5296,
"step": 3734
},
{
"epoch": 0.8,
"grad_norm": 0.17734676599502563,
"learning_rate": 9.684934661511909e-07,
"loss": 0.5386,
"step": 3735
},
{
"epoch": 0.8,
"grad_norm": 0.12994273006916046,
"learning_rate": 9.664306230855342e-07,
"loss": 0.5133,
"step": 3736
},
{
"epoch": 0.8,
"grad_norm": 0.14365623891353607,
"learning_rate": 9.643697441862782e-07,
"loss": 0.4759,
"step": 3737
},
{
"epoch": 0.81,
"grad_norm": 0.12920841574668884,
"learning_rate": 9.623108304569783e-07,
"loss": 0.4998,
"step": 3738
},
{
"epoch": 0.81,
"grad_norm": 0.14855559170246124,
"learning_rate": 9.6025388290023e-07,
"loss": 0.5141,
"step": 3739
},
{
"epoch": 0.81,
"grad_norm": 0.16959311068058014,
"learning_rate": 9.58198902517678e-07,
"loss": 0.5376,
"step": 3740
},
{
"epoch": 0.81,
"grad_norm": 0.14248433709144592,
"learning_rate": 9.561458903100025e-07,
"loss": 0.5684,
"step": 3741
},
{
"epoch": 0.81,
"grad_norm": 0.1944878101348877,
"learning_rate": 9.540948472769278e-07,
"loss": 0.4685,
"step": 3742
},
{
"epoch": 0.81,
"grad_norm": 0.16937778890132904,
"learning_rate": 9.520457744172218e-07,
"loss": 0.5127,
"step": 3743
},
{
"epoch": 0.81,
"grad_norm": 0.15535053610801697,
"learning_rate": 9.499986727286869e-07,
"loss": 0.509,
"step": 3744
},
{
"epoch": 0.81,
"grad_norm": 0.13496124744415283,
"learning_rate": 9.479535432081716e-07,
"loss": 0.4883,
"step": 3745
},
{
"epoch": 0.81,
"grad_norm": 0.15980157256126404,
"learning_rate": 9.459103868515618e-07,
"loss": 0.5115,
"step": 3746
},
{
"epoch": 0.81,
"grad_norm": 0.13277289271354675,
"learning_rate": 9.438692046537812e-07,
"loss": 0.5383,
"step": 3747
},
{
"epoch": 0.81,
"grad_norm": 0.15120829641819,
"learning_rate": 9.418299976087964e-07,
"loss": 0.4822,
"step": 3748
},
{
"epoch": 0.81,
"grad_norm": 0.18064884841442108,
"learning_rate": 9.397927667096058e-07,
"loss": 0.4813,
"step": 3749
},
{
"epoch": 0.81,
"grad_norm": 0.15214307606220245,
"learning_rate": 9.377575129482513e-07,
"loss": 0.538,
"step": 3750
},
{
"epoch": 0.81,
"grad_norm": 0.13899169862270355,
"learning_rate": 9.357242373158076e-07,
"loss": 0.5259,
"step": 3751
},
{
"epoch": 0.81,
"grad_norm": 0.17859694361686707,
"learning_rate": 9.336929408023887e-07,
"loss": 0.5298,
"step": 3752
},
{
"epoch": 0.81,
"grad_norm": 0.16139504313468933,
"learning_rate": 9.316636243971472e-07,
"loss": 0.47,
"step": 3753
},
{
"epoch": 0.81,
"grad_norm": 0.16516685485839844,
"learning_rate": 9.29636289088266e-07,
"loss": 0.4834,
"step": 3754
},
{
"epoch": 0.81,
"grad_norm": 0.1459476500749588,
"learning_rate": 9.27610935862967e-07,
"loss": 0.505,
"step": 3755
},
{
"epoch": 0.81,
"grad_norm": 0.14627854526042938,
"learning_rate": 9.255875657075053e-07,
"loss": 0.5443,
"step": 3756
},
{
"epoch": 0.81,
"grad_norm": 0.17981037497520447,
"learning_rate": 9.235661796071704e-07,
"loss": 0.5165,
"step": 3757
},
{
"epoch": 0.81,
"grad_norm": 0.14158649742603302,
"learning_rate": 9.215467785462873e-07,
"loss": 0.5373,
"step": 3758
},
{
"epoch": 0.81,
"grad_norm": 0.1404084414243698,
"learning_rate": 9.195293635082125e-07,
"loss": 0.5071,
"step": 3759
},
{
"epoch": 0.81,
"grad_norm": 0.18543866276741028,
"learning_rate": 9.175139354753382e-07,
"loss": 0.4776,
"step": 3760
},
{
"epoch": 0.81,
"grad_norm": 0.1633271872997284,
"learning_rate": 9.155004954290842e-07,
"loss": 0.5757,
"step": 3761
},
{
"epoch": 0.81,
"grad_norm": 0.13727520406246185,
"learning_rate": 9.134890443499068e-07,
"loss": 0.489,
"step": 3762
},
{
"epoch": 0.81,
"grad_norm": 0.14105379581451416,
"learning_rate": 9.114795832172907e-07,
"loss": 0.4545,
"step": 3763
},
{
"epoch": 0.81,
"grad_norm": 0.15787868201732635,
"learning_rate": 9.094721130097517e-07,
"loss": 0.5232,
"step": 3764
},
{
"epoch": 0.81,
"grad_norm": 0.12892010807991028,
"learning_rate": 9.074666347048416e-07,
"loss": 0.5527,
"step": 3765
},
{
"epoch": 0.81,
"grad_norm": 0.1309516578912735,
"learning_rate": 9.054631492791344e-07,
"loss": 0.5209,
"step": 3766
},
{
"epoch": 0.81,
"grad_norm": 0.14891640841960907,
"learning_rate": 9.034616577082389e-07,
"loss": 0.4782,
"step": 3767
},
{
"epoch": 0.81,
"grad_norm": 0.17316175997257233,
"learning_rate": 9.014621609667896e-07,
"loss": 0.5075,
"step": 3768
},
{
"epoch": 0.81,
"grad_norm": 0.17712554335594177,
"learning_rate": 8.994646600284518e-07,
"loss": 0.5551,
"step": 3769
},
{
"epoch": 0.81,
"grad_norm": 0.17951372265815735,
"learning_rate": 8.974691558659187e-07,
"loss": 0.4612,
"step": 3770
},
{
"epoch": 0.81,
"grad_norm": 0.18492546677589417,
"learning_rate": 8.954756494509104e-07,
"loss": 0.498,
"step": 3771
},
{
"epoch": 0.81,
"grad_norm": 0.15967923402786255,
"learning_rate": 8.934841417541767e-07,
"loss": 0.5152,
"step": 3772
},
{
"epoch": 0.81,
"grad_norm": 0.1444973647594452,
"learning_rate": 8.914946337454894e-07,
"loss": 0.4852,
"step": 3773
},
{
"epoch": 0.81,
"grad_norm": 0.13344036042690277,
"learning_rate": 8.8950712639365e-07,
"loss": 0.5396,
"step": 3774
},
{
"epoch": 0.81,
"grad_norm": 0.14624960720539093,
"learning_rate": 8.87521620666486e-07,
"loss": 0.4983,
"step": 3775
},
{
"epoch": 0.81,
"grad_norm": 0.15818633139133453,
"learning_rate": 8.855381175308475e-07,
"loss": 0.4791,
"step": 3776
},
{
"epoch": 0.81,
"grad_norm": 0.17238670587539673,
"learning_rate": 8.835566179526118e-07,
"loss": 0.475,
"step": 3777
},
{
"epoch": 0.81,
"grad_norm": 0.16176079213619232,
"learning_rate": 8.815771228966796e-07,
"loss": 0.5353,
"step": 3778
},
{
"epoch": 0.81,
"grad_norm": 0.17096221446990967,
"learning_rate": 8.795996333269763e-07,
"loss": 0.483,
"step": 3779
},
{
"epoch": 0.81,
"grad_norm": 0.1631225347518921,
"learning_rate": 8.776241502064508e-07,
"loss": 0.5166,
"step": 3780
},
{
"epoch": 0.81,
"grad_norm": 0.19986139237880707,
"learning_rate": 8.756506744970722e-07,
"loss": 0.529,
"step": 3781
},
{
"epoch": 0.81,
"grad_norm": 0.1557171493768692,
"learning_rate": 8.736792071598355e-07,
"loss": 0.5267,
"step": 3782
},
{
"epoch": 0.81,
"grad_norm": 0.15632902085781097,
"learning_rate": 8.717097491547566e-07,
"loss": 0.5189,
"step": 3783
},
{
"epoch": 0.82,
"grad_norm": 0.16741085052490234,
"learning_rate": 8.697423014408718e-07,
"loss": 0.5474,
"step": 3784
},
{
"epoch": 0.82,
"grad_norm": 0.15666568279266357,
"learning_rate": 8.677768649762419e-07,
"loss": 0.5306,
"step": 3785
},
{
"epoch": 0.82,
"grad_norm": 0.20284314453601837,
"learning_rate": 8.658134407179419e-07,
"loss": 0.5003,
"step": 3786
},
{
"epoch": 0.82,
"grad_norm": 0.1518256664276123,
"learning_rate": 8.638520296220748e-07,
"loss": 0.5322,
"step": 3787
},
{
"epoch": 0.82,
"grad_norm": 0.18800678849220276,
"learning_rate": 8.61892632643756e-07,
"loss": 0.4763,
"step": 3788
},
{
"epoch": 0.82,
"grad_norm": 0.1399422585964203,
"learning_rate": 8.59935250737125e-07,
"loss": 0.5293,
"step": 3789
},
{
"epoch": 0.82,
"grad_norm": 0.14804938435554504,
"learning_rate": 8.579798848553389e-07,
"loss": 0.4703,
"step": 3790
},
{
"epoch": 0.82,
"grad_norm": 0.15333673357963562,
"learning_rate": 8.560265359505716e-07,
"loss": 0.4947,
"step": 3791
},
{
"epoch": 0.82,
"grad_norm": 0.1545214056968689,
"learning_rate": 8.540752049740181e-07,
"loss": 0.5079,
"step": 3792
},
{
"epoch": 0.82,
"grad_norm": 0.137412428855896,
"learning_rate": 8.521258928758864e-07,
"loss": 0.4973,
"step": 3793
},
{
"epoch": 0.82,
"grad_norm": 0.1467263251543045,
"learning_rate": 8.501786006054047e-07,
"loss": 0.5318,
"step": 3794
},
{
"epoch": 0.82,
"grad_norm": 0.16581839323043823,
"learning_rate": 8.482333291108141e-07,
"loss": 0.5226,
"step": 3795
},
{
"epoch": 0.82,
"grad_norm": 0.1452476680278778,
"learning_rate": 8.462900793393775e-07,
"loss": 0.5012,
"step": 3796
},
{
"epoch": 0.82,
"grad_norm": 0.1618988811969757,
"learning_rate": 8.443488522373694e-07,
"loss": 0.501,
"step": 3797
},
{
"epoch": 0.82,
"grad_norm": 0.1634100079536438,
"learning_rate": 8.424096487500777e-07,
"loss": 0.5288,
"step": 3798
},
{
"epoch": 0.82,
"grad_norm": 0.17481021583080292,
"learning_rate": 8.404724698218103e-07,
"loss": 0.5575,
"step": 3799
},
{
"epoch": 0.82,
"grad_norm": 0.13058480620384216,
"learning_rate": 8.385373163958821e-07,
"loss": 0.4976,
"step": 3800
},
{
"epoch": 0.82,
"grad_norm": 0.1389196217060089,
"learning_rate": 8.366041894146276e-07,
"loss": 0.4854,
"step": 3801
},
{
"epoch": 0.82,
"grad_norm": 0.15564516186714172,
"learning_rate": 8.346730898193928e-07,
"loss": 0.4984,
"step": 3802
},
{
"epoch": 0.82,
"grad_norm": 0.1349528729915619,
"learning_rate": 8.327440185505353e-07,
"loss": 0.5138,
"step": 3803
},
{
"epoch": 0.82,
"grad_norm": 0.1407652646303177,
"learning_rate": 8.308169765474278e-07,
"loss": 0.4912,
"step": 3804
},
{
"epoch": 0.82,
"grad_norm": 0.14387796819210052,
"learning_rate": 8.2889196474845e-07,
"loss": 0.5048,
"step": 3805
},
{
"epoch": 0.82,
"grad_norm": 0.15386423468589783,
"learning_rate": 8.269689840909967e-07,
"loss": 0.5339,
"step": 3806
},
{
"epoch": 0.82,
"grad_norm": 0.16335895657539368,
"learning_rate": 8.250480355114748e-07,
"loss": 0.5343,
"step": 3807
},
{
"epoch": 0.82,
"grad_norm": 0.16175401210784912,
"learning_rate": 8.231291199452956e-07,
"loss": 0.52,
"step": 3808
},
{
"epoch": 0.82,
"grad_norm": 0.15114691853523254,
"learning_rate": 8.212122383268889e-07,
"loss": 0.5034,
"step": 3809
},
{
"epoch": 0.82,
"grad_norm": 0.13014435768127441,
"learning_rate": 8.192973915896868e-07,
"loss": 0.5266,
"step": 3810
},
{
"epoch": 0.82,
"grad_norm": 0.1377837210893631,
"learning_rate": 8.17384580666134e-07,
"loss": 0.5326,
"step": 3811
},
{
"epoch": 0.82,
"grad_norm": 0.17275045812129974,
"learning_rate": 8.154738064876843e-07,
"loss": 0.5156,
"step": 3812
},
{
"epoch": 0.82,
"grad_norm": 0.1639435589313507,
"learning_rate": 8.135650699847963e-07,
"loss": 0.504,
"step": 3813
},
{
"epoch": 0.82,
"grad_norm": 0.18835903704166412,
"learning_rate": 8.116583720869398e-07,
"loss": 0.5377,
"step": 3814
},
{
"epoch": 0.82,
"grad_norm": 0.1467577964067459,
"learning_rate": 8.097537137225909e-07,
"loss": 0.5437,
"step": 3815
},
{
"epoch": 0.82,
"grad_norm": 0.1413908451795578,
"learning_rate": 8.078510958192337e-07,
"loss": 0.5246,
"step": 3816
},
{
"epoch": 0.82,
"grad_norm": 0.21813301742076874,
"learning_rate": 8.05950519303354e-07,
"loss": 0.48,
"step": 3817
},
{
"epoch": 0.82,
"grad_norm": 0.14480313658714294,
"learning_rate": 8.040519851004492e-07,
"loss": 0.5298,
"step": 3818
},
{
"epoch": 0.82,
"grad_norm": 0.16793721914291382,
"learning_rate": 8.021554941350202e-07,
"loss": 0.4885,
"step": 3819
},
{
"epoch": 0.82,
"grad_norm": 0.15354284644126892,
"learning_rate": 8.002610473305688e-07,
"loss": 0.4743,
"step": 3820
},
{
"epoch": 0.82,
"grad_norm": 0.15883216261863708,
"learning_rate": 7.983686456096112e-07,
"loss": 0.5344,
"step": 3821
},
{
"epoch": 0.82,
"grad_norm": 0.16302940249443054,
"learning_rate": 7.964782898936569e-07,
"loss": 0.5251,
"step": 3822
},
{
"epoch": 0.82,
"grad_norm": 0.1534924954175949,
"learning_rate": 7.945899811032254e-07,
"loss": 0.5438,
"step": 3823
},
{
"epoch": 0.82,
"grad_norm": 0.1581207513809204,
"learning_rate": 7.927037201578397e-07,
"loss": 0.4707,
"step": 3824
},
{
"epoch": 0.82,
"grad_norm": 0.16421711444854736,
"learning_rate": 7.908195079760205e-07,
"loss": 0.485,
"step": 3825
},
{
"epoch": 0.82,
"grad_norm": 0.16686981916427612,
"learning_rate": 7.889373454752964e-07,
"loss": 0.5225,
"step": 3826
},
{
"epoch": 0.82,
"grad_norm": 0.1350572556257248,
"learning_rate": 7.870572335721949e-07,
"loss": 0.5018,
"step": 3827
},
{
"epoch": 0.82,
"grad_norm": 0.1447533518075943,
"learning_rate": 7.851791731822461e-07,
"loss": 0.5149,
"step": 3828
},
{
"epoch": 0.82,
"grad_norm": 0.13988631963729858,
"learning_rate": 7.833031652199819e-07,
"loss": 0.5441,
"step": 3829
},
{
"epoch": 0.83,
"grad_norm": 0.17618080973625183,
"learning_rate": 7.814292105989308e-07,
"loss": 0.5189,
"step": 3830
},
{
"epoch": 0.83,
"grad_norm": 0.1565089076757431,
"learning_rate": 7.795573102316267e-07,
"loss": 0.5091,
"step": 3831
},
{
"epoch": 0.83,
"grad_norm": 0.15142589807510376,
"learning_rate": 7.776874650295984e-07,
"loss": 0.4814,
"step": 3832
},
{
"epoch": 0.83,
"grad_norm": 0.1675831824541092,
"learning_rate": 7.758196759033765e-07,
"loss": 0.4961,
"step": 3833
},
{
"epoch": 0.83,
"grad_norm": 0.15488111972808838,
"learning_rate": 7.739539437624933e-07,
"loss": 0.552,
"step": 3834
},
{
"epoch": 0.83,
"grad_norm": 0.133047953248024,
"learning_rate": 7.720902695154725e-07,
"loss": 0.506,
"step": 3835
},
{
"epoch": 0.83,
"grad_norm": 0.13741527497768402,
"learning_rate": 7.702286540698417e-07,
"loss": 0.4968,
"step": 3836
},
{
"epoch": 0.83,
"grad_norm": 0.13112328946590424,
"learning_rate": 7.683690983321224e-07,
"loss": 0.4906,
"step": 3837
},
{
"epoch": 0.83,
"grad_norm": 0.17950129508972168,
"learning_rate": 7.665116032078346e-07,
"loss": 0.5324,
"step": 3838
},
{
"epoch": 0.83,
"grad_norm": 0.21670496463775635,
"learning_rate": 7.646561696014948e-07,
"loss": 0.5378,
"step": 3839
},
{
"epoch": 0.83,
"grad_norm": 0.14193449914455414,
"learning_rate": 7.628027984166153e-07,
"loss": 0.5395,
"step": 3840
},
{
"epoch": 0.83,
"grad_norm": 0.14640313386917114,
"learning_rate": 7.609514905557058e-07,
"loss": 0.4765,
"step": 3841
},
{
"epoch": 0.83,
"grad_norm": 0.15662898123264313,
"learning_rate": 7.591022469202675e-07,
"loss": 0.5274,
"step": 3842
},
{
"epoch": 0.83,
"grad_norm": 0.15614978969097137,
"learning_rate": 7.57255068410801e-07,
"loss": 0.4858,
"step": 3843
},
{
"epoch": 0.83,
"grad_norm": 0.13435639441013336,
"learning_rate": 7.554099559267964e-07,
"loss": 0.4774,
"step": 3844
},
{
"epoch": 0.83,
"grad_norm": 0.1398366242647171,
"learning_rate": 7.535669103667409e-07,
"loss": 0.5893,
"step": 3845
},
{
"epoch": 0.83,
"grad_norm": 0.14986996352672577,
"learning_rate": 7.517259326281157e-07,
"loss": 0.5105,
"step": 3846
},
{
"epoch": 0.83,
"grad_norm": 0.15778091549873352,
"learning_rate": 7.49887023607393e-07,
"loss": 0.4488,
"step": 3847
},
{
"epoch": 0.83,
"grad_norm": 0.16323697566986084,
"learning_rate": 7.480501842000404e-07,
"loss": 0.5533,
"step": 3848
},
{
"epoch": 0.83,
"grad_norm": 0.14002352952957153,
"learning_rate": 7.462154153005136e-07,
"loss": 0.5196,
"step": 3849
},
{
"epoch": 0.83,
"grad_norm": 0.1188010647892952,
"learning_rate": 7.443827178022628e-07,
"loss": 0.4912,
"step": 3850
},
{
"epoch": 0.83,
"grad_norm": 0.14760838449001312,
"learning_rate": 7.425520925977292e-07,
"loss": 0.5157,
"step": 3851
},
{
"epoch": 0.83,
"grad_norm": 0.19391202926635742,
"learning_rate": 7.407235405783453e-07,
"loss": 0.4939,
"step": 3852
},
{
"epoch": 0.83,
"grad_norm": 0.1490384191274643,
"learning_rate": 7.388970626345343e-07,
"loss": 0.494,
"step": 3853
},
{
"epoch": 0.83,
"grad_norm": 0.16639220714569092,
"learning_rate": 7.370726596557059e-07,
"loss": 0.488,
"step": 3854
},
{
"epoch": 0.83,
"grad_norm": 0.16223375499248505,
"learning_rate": 7.352503325302635e-07,
"loss": 0.4825,
"step": 3855
},
{
"epoch": 0.83,
"grad_norm": 0.16969801485538483,
"learning_rate": 7.334300821455998e-07,
"loss": 0.5288,
"step": 3856
},
{
"epoch": 0.83,
"grad_norm": 0.1843784898519516,
"learning_rate": 7.316119093880919e-07,
"loss": 0.4818,
"step": 3857
},
{
"epoch": 0.83,
"grad_norm": 0.139174684882164,
"learning_rate": 7.297958151431094e-07,
"loss": 0.5019,
"step": 3858
},
{
"epoch": 0.83,
"grad_norm": 0.18277384340763092,
"learning_rate": 7.279818002950079e-07,
"loss": 0.5432,
"step": 3859
},
{
"epoch": 0.83,
"grad_norm": 0.1524992436170578,
"learning_rate": 7.26169865727131e-07,
"loss": 0.5223,
"step": 3860
},
{
"epoch": 0.83,
"grad_norm": 0.16654187440872192,
"learning_rate": 7.243600123218109e-07,
"loss": 0.4757,
"step": 3861
},
{
"epoch": 0.83,
"grad_norm": 0.24710118770599365,
"learning_rate": 7.225522409603608e-07,
"loss": 0.5699,
"step": 3862
},
{
"epoch": 0.83,
"grad_norm": 0.15701556205749512,
"learning_rate": 7.207465525230878e-07,
"loss": 0.5001,
"step": 3863
},
{
"epoch": 0.83,
"grad_norm": 0.17674629390239716,
"learning_rate": 7.189429478892762e-07,
"loss": 0.4661,
"step": 3864
},
{
"epoch": 0.83,
"grad_norm": 0.15791045129299164,
"learning_rate": 7.171414279372041e-07,
"loss": 0.4895,
"step": 3865
},
{
"epoch": 0.83,
"grad_norm": 0.10984218865633011,
"learning_rate": 7.153419935441303e-07,
"loss": 0.4908,
"step": 3866
},
{
"epoch": 0.83,
"grad_norm": 0.1386931836605072,
"learning_rate": 7.135446455862954e-07,
"loss": 0.452,
"step": 3867
},
{
"epoch": 0.83,
"grad_norm": 0.17676003277301788,
"learning_rate": 7.117493849389306e-07,
"loss": 0.5278,
"step": 3868
},
{
"epoch": 0.83,
"grad_norm": 0.1930963546037674,
"learning_rate": 7.099562124762426e-07,
"loss": 0.4919,
"step": 3869
},
{
"epoch": 0.83,
"grad_norm": 0.13967633247375488,
"learning_rate": 7.081651290714287e-07,
"loss": 0.5333,
"step": 3870
},
{
"epoch": 0.83,
"grad_norm": 0.18139459192752838,
"learning_rate": 7.063761355966642e-07,
"loss": 0.4855,
"step": 3871
},
{
"epoch": 0.83,
"grad_norm": 0.13663552701473236,
"learning_rate": 7.045892329231086e-07,
"loss": 0.5479,
"step": 3872
},
{
"epoch": 0.83,
"grad_norm": 0.1746217906475067,
"learning_rate": 7.028044219209046e-07,
"loss": 0.4923,
"step": 3873
},
{
"epoch": 0.83,
"grad_norm": 0.14870743453502655,
"learning_rate": 7.010217034591721e-07,
"loss": 0.5018,
"step": 3874
},
{
"epoch": 0.83,
"grad_norm": 0.1460588276386261,
"learning_rate": 6.992410784060166e-07,
"loss": 0.46,
"step": 3875
},
{
"epoch": 0.83,
"grad_norm": 0.1792103797197342,
"learning_rate": 6.974625476285191e-07,
"loss": 0.524,
"step": 3876
},
{
"epoch": 0.84,
"grad_norm": 0.18173110485076904,
"learning_rate": 6.956861119927472e-07,
"loss": 0.4626,
"step": 3877
},
{
"epoch": 0.84,
"grad_norm": 0.1377502679824829,
"learning_rate": 6.93911772363745e-07,
"loss": 0.5192,
"step": 3878
},
{
"epoch": 0.84,
"grad_norm": 0.17006491124629974,
"learning_rate": 6.921395296055333e-07,
"loss": 0.5051,
"step": 3879
},
{
"epoch": 0.84,
"grad_norm": 0.13877364993095398,
"learning_rate": 6.903693845811176e-07,
"loss": 0.5102,
"step": 3880
},
{
"epoch": 0.84,
"grad_norm": 0.17840033769607544,
"learning_rate": 6.886013381524753e-07,
"loss": 0.4961,
"step": 3881
},
{
"epoch": 0.84,
"grad_norm": 0.1865067183971405,
"learning_rate": 6.86835391180567e-07,
"loss": 0.5206,
"step": 3882
},
{
"epoch": 0.84,
"grad_norm": 0.20453877747058868,
"learning_rate": 6.850715445253297e-07,
"loss": 0.5632,
"step": 3883
},
{
"epoch": 0.84,
"grad_norm": 0.15611490607261658,
"learning_rate": 6.833097990456761e-07,
"loss": 0.5682,
"step": 3884
},
{
"epoch": 0.84,
"grad_norm": 0.15531837940216064,
"learning_rate": 6.815501555994986e-07,
"loss": 0.5113,
"step": 3885
},
{
"epoch": 0.84,
"grad_norm": 0.13591976463794708,
"learning_rate": 6.797926150436618e-07,
"loss": 0.5462,
"step": 3886
},
{
"epoch": 0.84,
"grad_norm": 0.1687079221010208,
"learning_rate": 6.780371782340101e-07,
"loss": 0.5001,
"step": 3887
},
{
"epoch": 0.84,
"grad_norm": 0.16073539853096008,
"learning_rate": 6.762838460253629e-07,
"loss": 0.4732,
"step": 3888
},
{
"epoch": 0.84,
"grad_norm": 0.14205871522426605,
"learning_rate": 6.745326192715107e-07,
"loss": 0.5361,
"step": 3889
},
{
"epoch": 0.84,
"grad_norm": 0.152149498462677,
"learning_rate": 6.727834988252258e-07,
"loss": 0.4968,
"step": 3890
},
{
"epoch": 0.84,
"grad_norm": 0.1546226292848587,
"learning_rate": 6.71036485538249e-07,
"loss": 0.5439,
"step": 3891
},
{
"epoch": 0.84,
"grad_norm": 0.1516941487789154,
"learning_rate": 6.692915802612965e-07,
"loss": 0.5259,
"step": 3892
},
{
"epoch": 0.84,
"grad_norm": 0.15468570590019226,
"learning_rate": 6.675487838440608e-07,
"loss": 0.4867,
"step": 3893
},
{
"epoch": 0.84,
"grad_norm": 0.15356989204883575,
"learning_rate": 6.658080971352026e-07,
"loss": 0.4858,
"step": 3894
},
{
"epoch": 0.84,
"grad_norm": 0.16798479855060577,
"learning_rate": 6.640695209823588e-07,
"loss": 0.5147,
"step": 3895
},
{
"epoch": 0.84,
"grad_norm": 0.15462863445281982,
"learning_rate": 6.623330562321378e-07,
"loss": 0.517,
"step": 3896
},
{
"epoch": 0.84,
"grad_norm": 0.13679371774196625,
"learning_rate": 6.605987037301204e-07,
"loss": 0.535,
"step": 3897
},
{
"epoch": 0.84,
"grad_norm": 0.12895052134990692,
"learning_rate": 6.588664643208559e-07,
"loss": 0.5082,
"step": 3898
},
{
"epoch": 0.84,
"grad_norm": 0.16611763834953308,
"learning_rate": 6.571363388478686e-07,
"loss": 0.495,
"step": 3899
},
{
"epoch": 0.84,
"grad_norm": 0.12473352998495102,
"learning_rate": 6.554083281536516e-07,
"loss": 0.5251,
"step": 3900
},
{
"epoch": 0.84,
"grad_norm": 0.15122053027153015,
"learning_rate": 6.53682433079667e-07,
"loss": 0.4709,
"step": 3901
},
{
"epoch": 0.84,
"grad_norm": 0.15693899989128113,
"learning_rate": 6.519586544663481e-07,
"loss": 0.4572,
"step": 3902
},
{
"epoch": 0.84,
"grad_norm": 0.18545718491077423,
"learning_rate": 6.502369931530977e-07,
"loss": 0.5047,
"step": 3903
},
{
"epoch": 0.84,
"grad_norm": 0.20683102309703827,
"learning_rate": 6.485174499782876e-07,
"loss": 0.476,
"step": 3904
},
{
"epoch": 0.84,
"grad_norm": 0.12381558865308762,
"learning_rate": 6.468000257792583e-07,
"loss": 0.5589,
"step": 3905
},
{
"epoch": 0.84,
"grad_norm": 0.22837506234645844,
"learning_rate": 6.450847213923162e-07,
"loss": 0.512,
"step": 3906
},
{
"epoch": 0.84,
"grad_norm": 0.15001285076141357,
"learning_rate": 6.433715376527383e-07,
"loss": 0.4689,
"step": 3907
},
{
"epoch": 0.84,
"grad_norm": 0.1989048719406128,
"learning_rate": 6.416604753947675e-07,
"loss": 0.4834,
"step": 3908
},
{
"epoch": 0.84,
"grad_norm": 0.23922041058540344,
"learning_rate": 6.399515354516139e-07,
"loss": 0.5496,
"step": 3909
},
{
"epoch": 0.84,
"grad_norm": 0.15358422696590424,
"learning_rate": 6.382447186554553e-07,
"loss": 0.5441,
"step": 3910
},
{
"epoch": 0.84,
"grad_norm": 0.19341875612735748,
"learning_rate": 6.365400258374327e-07,
"loss": 0.5052,
"step": 3911
},
{
"epoch": 0.84,
"grad_norm": 0.1362185925245285,
"learning_rate": 6.348374578276567e-07,
"loss": 0.5318,
"step": 3912
},
{
"epoch": 0.84,
"grad_norm": 0.172585591673851,
"learning_rate": 6.331370154551986e-07,
"loss": 0.5385,
"step": 3913
},
{
"epoch": 0.84,
"grad_norm": 0.16115382313728333,
"learning_rate": 6.314386995480987e-07,
"loss": 0.5018,
"step": 3914
},
{
"epoch": 0.84,
"grad_norm": 0.14296384155750275,
"learning_rate": 6.297425109333605e-07,
"loss": 0.5275,
"step": 3915
},
{
"epoch": 0.84,
"grad_norm": 0.16052164137363434,
"learning_rate": 6.280484504369505e-07,
"loss": 0.5066,
"step": 3916
},
{
"epoch": 0.84,
"grad_norm": 0.1424168050289154,
"learning_rate": 6.263565188838011e-07,
"loss": 0.4944,
"step": 3917
},
{
"epoch": 0.84,
"grad_norm": 0.1381656974554062,
"learning_rate": 6.246667170978049e-07,
"loss": 0.5041,
"step": 3918
},
{
"epoch": 0.84,
"grad_norm": 0.1506141573190689,
"learning_rate": 6.229790459018203e-07,
"loss": 0.5599,
"step": 3919
},
{
"epoch": 0.84,
"grad_norm": 0.142376109957695,
"learning_rate": 6.212935061176667e-07,
"loss": 0.5435,
"step": 3920
},
{
"epoch": 0.84,
"grad_norm": 0.1417161524295807,
"learning_rate": 6.196100985661258e-07,
"loss": 0.5334,
"step": 3921
},
{
"epoch": 0.84,
"grad_norm": 0.16186197102069855,
"learning_rate": 6.179288240669429e-07,
"loss": 0.5081,
"step": 3922
},
{
"epoch": 0.85,
"grad_norm": 0.18143245577812195,
"learning_rate": 6.162496834388204e-07,
"loss": 0.5346,
"step": 3923
},
{
"epoch": 0.85,
"grad_norm": 0.16008998453617096,
"learning_rate": 6.14572677499426e-07,
"loss": 0.5284,
"step": 3924
},
{
"epoch": 0.85,
"grad_norm": 0.1312318742275238,
"learning_rate": 6.12897807065384e-07,
"loss": 0.5083,
"step": 3925
},
{
"epoch": 0.85,
"grad_norm": 0.15271489322185516,
"learning_rate": 6.112250729522823e-07,
"loss": 0.5676,
"step": 3926
},
{
"epoch": 0.85,
"grad_norm": 0.1383863240480423,
"learning_rate": 6.095544759746663e-07,
"loss": 0.472,
"step": 3927
},
{
"epoch": 0.85,
"grad_norm": 0.14017315208911896,
"learning_rate": 6.078860169460416e-07,
"loss": 0.4941,
"step": 3928
},
{
"epoch": 0.85,
"grad_norm": 0.1404963880777359,
"learning_rate": 6.062196966788736e-07,
"loss": 0.5128,
"step": 3929
},
{
"epoch": 0.85,
"grad_norm": 0.1775158941745758,
"learning_rate": 6.045555159845828e-07,
"loss": 0.5326,
"step": 3930
},
{
"epoch": 0.85,
"grad_norm": 0.13232095539569855,
"learning_rate": 6.028934756735516e-07,
"loss": 0.4828,
"step": 3931
},
{
"epoch": 0.85,
"grad_norm": 0.14679361879825592,
"learning_rate": 6.012335765551186e-07,
"loss": 0.5059,
"step": 3932
},
{
"epoch": 0.85,
"grad_norm": 0.16096676886081696,
"learning_rate": 5.995758194375794e-07,
"loss": 0.4844,
"step": 3933
},
{
"epoch": 0.85,
"grad_norm": 0.17318318784236908,
"learning_rate": 5.979202051281891e-07,
"loss": 0.506,
"step": 3934
},
{
"epoch": 0.85,
"grad_norm": 0.1557616889476776,
"learning_rate": 5.962667344331535e-07,
"loss": 0.545,
"step": 3935
},
{
"epoch": 0.85,
"grad_norm": 0.1732773780822754,
"learning_rate": 5.946154081576411e-07,
"loss": 0.5198,
"step": 3936
},
{
"epoch": 0.85,
"grad_norm": 0.14775021374225616,
"learning_rate": 5.929662271057729e-07,
"loss": 0.5117,
"step": 3937
},
{
"epoch": 0.85,
"grad_norm": 0.2609883248806,
"learning_rate": 5.913191920806244e-07,
"loss": 0.495,
"step": 3938
},
{
"epoch": 0.85,
"grad_norm": 0.20081481337547302,
"learning_rate": 5.896743038842279e-07,
"loss": 0.51,
"step": 3939
},
{
"epoch": 0.85,
"grad_norm": 0.17543698847293854,
"learning_rate": 5.880315633175704e-07,
"loss": 0.5292,
"step": 3940
},
{
"epoch": 0.85,
"grad_norm": 0.15874987840652466,
"learning_rate": 5.863909711805915e-07,
"loss": 0.4689,
"step": 3941
},
{
"epoch": 0.85,
"grad_norm": 0.12618225812911987,
"learning_rate": 5.847525282721883e-07,
"loss": 0.4914,
"step": 3942
},
{
"epoch": 0.85,
"grad_norm": 0.12914496660232544,
"learning_rate": 5.831162353902048e-07,
"loss": 0.5027,
"step": 3943
},
{
"epoch": 0.85,
"grad_norm": 0.13037589192390442,
"learning_rate": 5.814820933314446e-07,
"loss": 0.5111,
"step": 3944
},
{
"epoch": 0.85,
"grad_norm": 0.14492201805114746,
"learning_rate": 5.798501028916587e-07,
"loss": 0.5404,
"step": 3945
},
{
"epoch": 0.85,
"grad_norm": 0.17597924172878265,
"learning_rate": 5.78220264865555e-07,
"loss": 0.4962,
"step": 3946
},
{
"epoch": 0.85,
"grad_norm": 0.1367659866809845,
"learning_rate": 5.76592580046792e-07,
"loss": 0.4624,
"step": 3947
},
{
"epoch": 0.85,
"grad_norm": 0.16057761013507843,
"learning_rate": 5.749670492279757e-07,
"loss": 0.4985,
"step": 3948
},
{
"epoch": 0.85,
"grad_norm": 0.1518482267856598,
"learning_rate": 5.733436732006692e-07,
"loss": 0.512,
"step": 3949
},
{
"epoch": 0.85,
"grad_norm": 0.14091022312641144,
"learning_rate": 5.717224527553811e-07,
"loss": 0.5218,
"step": 3950
},
{
"epoch": 0.85,
"grad_norm": 0.13686485588550568,
"learning_rate": 5.701033886815738e-07,
"loss": 0.4916,
"step": 3951
},
{
"epoch": 0.85,
"grad_norm": 0.13204288482666016,
"learning_rate": 5.684864817676583e-07,
"loss": 0.495,
"step": 3952
},
{
"epoch": 0.85,
"grad_norm": 0.2482197880744934,
"learning_rate": 5.668717328009954e-07,
"loss": 0.5075,
"step": 3953
},
{
"epoch": 0.85,
"grad_norm": 0.14586390554904938,
"learning_rate": 5.65259142567896e-07,
"loss": 0.504,
"step": 3954
},
{
"epoch": 0.85,
"grad_norm": 0.15525588393211365,
"learning_rate": 5.636487118536171e-07,
"loss": 0.5429,
"step": 3955
},
{
"epoch": 0.85,
"grad_norm": 0.1539800614118576,
"learning_rate": 5.620404414423674e-07,
"loss": 0.5228,
"step": 3956
},
{
"epoch": 0.85,
"grad_norm": 0.16243867576122284,
"learning_rate": 5.604343321173006e-07,
"loss": 0.5141,
"step": 3957
},
{
"epoch": 0.85,
"grad_norm": 0.17166343331336975,
"learning_rate": 5.588303846605187e-07,
"loss": 0.5474,
"step": 3958
},
{
"epoch": 0.85,
"grad_norm": 0.1559562087059021,
"learning_rate": 5.572285998530758e-07,
"loss": 0.4877,
"step": 3959
},
{
"epoch": 0.85,
"grad_norm": 0.12732228636741638,
"learning_rate": 5.556289784749653e-07,
"loss": 0.4967,
"step": 3960
},
{
"epoch": 0.85,
"grad_norm": 0.15208043158054352,
"learning_rate": 5.540315213051323e-07,
"loss": 0.5032,
"step": 3961
},
{
"epoch": 0.85,
"grad_norm": 0.14678843319416046,
"learning_rate": 5.524362291214652e-07,
"loss": 0.5706,
"step": 3962
},
{
"epoch": 0.85,
"grad_norm": 0.1634489744901657,
"learning_rate": 5.508431027008004e-07,
"loss": 0.4835,
"step": 3963
},
{
"epoch": 0.85,
"grad_norm": 0.13023607432842255,
"learning_rate": 5.492521428189179e-07,
"loss": 0.491,
"step": 3964
},
{
"epoch": 0.85,
"grad_norm": 0.13974343240261078,
"learning_rate": 5.476633502505436e-07,
"loss": 0.5619,
"step": 3965
},
{
"epoch": 0.85,
"grad_norm": 0.17008721828460693,
"learning_rate": 5.460767257693489e-07,
"loss": 0.4755,
"step": 3966
},
{
"epoch": 0.85,
"grad_norm": 0.20171983540058136,
"learning_rate": 5.444922701479465e-07,
"loss": 0.5274,
"step": 3967
},
{
"epoch": 0.85,
"grad_norm": 0.24001158773899078,
"learning_rate": 5.429099841578966e-07,
"loss": 0.5145,
"step": 3968
},
{
"epoch": 0.85,
"grad_norm": 0.15838083624839783,
"learning_rate": 5.413298685697005e-07,
"loss": 0.4835,
"step": 3969
},
{
"epoch": 0.86,
"grad_norm": 0.13740849494934082,
"learning_rate": 5.397519241528026e-07,
"loss": 0.4933,
"step": 3970
},
{
"epoch": 0.86,
"grad_norm": 0.15910400450229645,
"learning_rate": 5.381761516755907e-07,
"loss": 0.5559,
"step": 3971
},
{
"epoch": 0.86,
"grad_norm": 0.1526496410369873,
"learning_rate": 5.366025519053958e-07,
"loss": 0.5526,
"step": 3972
},
{
"epoch": 0.86,
"grad_norm": 0.14740879833698273,
"learning_rate": 5.350311256084895e-07,
"loss": 0.5,
"step": 3973
},
{
"epoch": 0.86,
"grad_norm": 0.12307767570018768,
"learning_rate": 5.334618735500868e-07,
"loss": 0.544,
"step": 3974
},
{
"epoch": 0.86,
"grad_norm": 0.1413116753101349,
"learning_rate": 5.3189479649434e-07,
"loss": 0.5074,
"step": 3975
},
{
"epoch": 0.86,
"grad_norm": 0.1412649154663086,
"learning_rate": 5.303298952043473e-07,
"loss": 0.5446,
"step": 3976
},
{
"epoch": 0.86,
"grad_norm": 0.17468446493148804,
"learning_rate": 5.287671704421437e-07,
"loss": 0.5217,
"step": 3977
},
{
"epoch": 0.86,
"grad_norm": 0.15514497458934784,
"learning_rate": 5.272066229687078e-07,
"loss": 0.542,
"step": 3978
},
{
"epoch": 0.86,
"grad_norm": 0.16648997366428375,
"learning_rate": 5.256482535439528e-07,
"loss": 0.4755,
"step": 3979
},
{
"epoch": 0.86,
"grad_norm": 0.14344756305217743,
"learning_rate": 5.24092062926736e-07,
"loss": 0.5393,
"step": 3980
},
{
"epoch": 0.86,
"grad_norm": 0.14399173855781555,
"learning_rate": 5.225380518748529e-07,
"loss": 0.4944,
"step": 3981
},
{
"epoch": 0.86,
"grad_norm": 0.16422103345394135,
"learning_rate": 5.209862211450351e-07,
"loss": 0.5151,
"step": 3982
},
{
"epoch": 0.86,
"grad_norm": 0.20136775076389313,
"learning_rate": 5.19436571492955e-07,
"loss": 0.4696,
"step": 3983
},
{
"epoch": 0.86,
"grad_norm": 0.16965395212173462,
"learning_rate": 5.17889103673222e-07,
"loss": 0.5225,
"step": 3984
},
{
"epoch": 0.86,
"grad_norm": 0.13450326025485992,
"learning_rate": 5.163438184393826e-07,
"loss": 0.5,
"step": 3985
},
{
"epoch": 0.86,
"grad_norm": 0.16451282799243927,
"learning_rate": 5.148007165439234e-07,
"loss": 0.4973,
"step": 3986
},
{
"epoch": 0.86,
"grad_norm": 0.13875506818294525,
"learning_rate": 5.13259798738262e-07,
"loss": 0.4976,
"step": 3987
},
{
"epoch": 0.86,
"grad_norm": 0.13691715896129608,
"learning_rate": 5.117210657727589e-07,
"loss": 0.5844,
"step": 3988
},
{
"epoch": 0.86,
"grad_norm": 0.16079024970531464,
"learning_rate": 5.101845183967041e-07,
"loss": 0.5084,
"step": 3989
},
{
"epoch": 0.86,
"grad_norm": 0.1576671302318573,
"learning_rate": 5.086501573583302e-07,
"loss": 0.5307,
"step": 3990
},
{
"epoch": 0.86,
"grad_norm": 0.14902909100055695,
"learning_rate": 5.071179834048018e-07,
"loss": 0.5562,
"step": 3991
},
{
"epoch": 0.86,
"grad_norm": 0.17067904770374298,
"learning_rate": 5.055879972822164e-07,
"loss": 0.5427,
"step": 3992
},
{
"epoch": 0.86,
"grad_norm": 0.23107197880744934,
"learning_rate": 5.040601997356098e-07,
"loss": 0.5028,
"step": 3993
},
{
"epoch": 0.86,
"grad_norm": 0.15796354413032532,
"learning_rate": 5.025345915089497e-07,
"loss": 0.5006,
"step": 3994
},
{
"epoch": 0.86,
"grad_norm": 0.15521222352981567,
"learning_rate": 5.010111733451384e-07,
"loss": 0.5438,
"step": 3995
},
{
"epoch": 0.86,
"grad_norm": 0.1400623768568039,
"learning_rate": 4.994899459860125e-07,
"loss": 0.5441,
"step": 3996
},
{
"epoch": 0.86,
"grad_norm": 0.15729603171348572,
"learning_rate": 4.979709101723407e-07,
"loss": 0.5244,
"step": 3997
},
{
"epoch": 0.86,
"grad_norm": 0.17316539585590363,
"learning_rate": 4.964540666438261e-07,
"loss": 0.5038,
"step": 3998
},
{
"epoch": 0.86,
"grad_norm": 0.16760565340518951,
"learning_rate": 4.949394161391013e-07,
"loss": 0.5128,
"step": 3999
},
{
"epoch": 0.86,
"grad_norm": 0.13866716623306274,
"learning_rate": 4.934269593957336e-07,
"loss": 0.5033,
"step": 4000
},
{
"epoch": 0.86,
"grad_norm": 0.17374561727046967,
"learning_rate": 4.919166971502215e-07,
"loss": 0.4985,
"step": 4001
},
{
"epoch": 0.86,
"grad_norm": 0.16311132907867432,
"learning_rate": 4.90408630137994e-07,
"loss": 0.5016,
"step": 4002
},
{
"epoch": 0.86,
"grad_norm": 0.15572021901607513,
"learning_rate": 4.889027590934131e-07,
"loss": 0.5121,
"step": 4003
},
{
"epoch": 0.86,
"grad_norm": 0.20856572687625885,
"learning_rate": 4.873990847497684e-07,
"loss": 0.5021,
"step": 4004
},
{
"epoch": 0.86,
"grad_norm": 0.15273533761501312,
"learning_rate": 4.85897607839283e-07,
"loss": 0.5781,
"step": 4005
},
{
"epoch": 0.86,
"grad_norm": 0.14332985877990723,
"learning_rate": 4.843983290931064e-07,
"loss": 0.4704,
"step": 4006
},
{
"epoch": 0.86,
"grad_norm": 0.17221957445144653,
"learning_rate": 4.829012492413215e-07,
"loss": 0.4858,
"step": 4007
},
{
"epoch": 0.86,
"grad_norm": 0.14145652949810028,
"learning_rate": 4.814063690129378e-07,
"loss": 0.5182,
"step": 4008
},
{
"epoch": 0.86,
"grad_norm": 0.15986113250255585,
"learning_rate": 4.799136891358952e-07,
"loss": 0.5424,
"step": 4009
},
{
"epoch": 0.86,
"grad_norm": 0.1356787085533142,
"learning_rate": 4.784232103370617e-07,
"loss": 0.494,
"step": 4010
},
{
"epoch": 0.86,
"grad_norm": 0.19140973687171936,
"learning_rate": 4.769349333422324e-07,
"loss": 0.4956,
"step": 4011
},
{
"epoch": 0.86,
"grad_norm": 0.14601151645183563,
"learning_rate": 4.7544885887613136e-07,
"loss": 0.5142,
"step": 4012
},
{
"epoch": 0.86,
"grad_norm": 0.16945038735866547,
"learning_rate": 4.739649876624108e-07,
"loss": 0.5068,
"step": 4013
},
{
"epoch": 0.86,
"grad_norm": 0.1639741212129593,
"learning_rate": 4.724833204236462e-07,
"loss": 0.4829,
"step": 4014
},
{
"epoch": 0.86,
"grad_norm": 0.21183674037456512,
"learning_rate": 4.710038578813469e-07,
"loss": 0.4902,
"step": 4015
},
{
"epoch": 0.87,
"grad_norm": 0.167417973279953,
"learning_rate": 4.695266007559407e-07,
"loss": 0.504,
"step": 4016
},
{
"epoch": 0.87,
"grad_norm": 0.18118150532245636,
"learning_rate": 4.6805154976678755e-07,
"loss": 0.5233,
"step": 4017
},
{
"epoch": 0.87,
"grad_norm": 0.16984857618808746,
"learning_rate": 4.6657870563217076e-07,
"loss": 0.5051,
"step": 4018
},
{
"epoch": 0.87,
"grad_norm": 0.17123106122016907,
"learning_rate": 4.651080690692972e-07,
"loss": 0.5429,
"step": 4019
},
{
"epoch": 0.87,
"grad_norm": 0.15946775674819946,
"learning_rate": 4.6363964079430166e-07,
"loss": 0.5523,
"step": 4020
},
{
"epoch": 0.87,
"grad_norm": 0.13110215961933136,
"learning_rate": 4.6217342152224233e-07,
"loss": 0.5525,
"step": 4021
},
{
"epoch": 0.87,
"grad_norm": 0.1535872519016266,
"learning_rate": 4.6070941196710186e-07,
"loss": 0.5344,
"step": 4022
},
{
"epoch": 0.87,
"grad_norm": 0.15114997327327728,
"learning_rate": 4.5924761284178834e-07,
"loss": 0.4776,
"step": 4023
},
{
"epoch": 0.87,
"grad_norm": 0.15840767323970795,
"learning_rate": 4.5778802485812956e-07,
"loss": 0.506,
"step": 4024
},
{
"epoch": 0.87,
"grad_norm": 0.1840353012084961,
"learning_rate": 4.5633064872688093e-07,
"loss": 0.5216,
"step": 4025
},
{
"epoch": 0.87,
"grad_norm": 0.18357300758361816,
"learning_rate": 4.548754851577175e-07,
"loss": 0.5406,
"step": 4026
},
{
"epoch": 0.87,
"grad_norm": 0.16347016394138336,
"learning_rate": 4.5342253485923803e-07,
"loss": 0.5085,
"step": 4027
},
{
"epoch": 0.87,
"grad_norm": 0.2056354433298111,
"learning_rate": 4.5197179853896654e-07,
"loss": 0.518,
"step": 4028
},
{
"epoch": 0.87,
"grad_norm": 0.1330898255109787,
"learning_rate": 4.505232769033435e-07,
"loss": 0.5138,
"step": 4029
},
{
"epoch": 0.87,
"grad_norm": 0.16567635536193848,
"learning_rate": 4.4907697065773523e-07,
"loss": 0.5258,
"step": 4030
},
{
"epoch": 0.87,
"grad_norm": 0.1845930814743042,
"learning_rate": 4.476328805064262e-07,
"loss": 0.5277,
"step": 4031
},
{
"epoch": 0.87,
"grad_norm": 0.1463019847869873,
"learning_rate": 4.4619100715262374e-07,
"loss": 0.4919,
"step": 4032
},
{
"epoch": 0.87,
"grad_norm": 0.12273728102445602,
"learning_rate": 4.447513512984558e-07,
"loss": 0.4665,
"step": 4033
},
{
"epoch": 0.87,
"grad_norm": 0.1603401154279709,
"learning_rate": 4.4331391364496934e-07,
"loss": 0.517,
"step": 4034
},
{
"epoch": 0.87,
"grad_norm": 0.15330933034420013,
"learning_rate": 4.4187869489213275e-07,
"loss": 0.5976,
"step": 4035
},
{
"epoch": 0.87,
"grad_norm": 0.21303099393844604,
"learning_rate": 4.404456957388309e-07,
"loss": 0.5608,
"step": 4036
},
{
"epoch": 0.87,
"grad_norm": 0.15875820815563202,
"learning_rate": 4.3901491688287113e-07,
"loss": 0.5394,
"step": 4037
},
{
"epoch": 0.87,
"grad_norm": 0.18736515939235687,
"learning_rate": 4.375863590209778e-07,
"loss": 0.4804,
"step": 4038
},
{
"epoch": 0.87,
"grad_norm": 0.21394529938697815,
"learning_rate": 4.3616002284879333e-07,
"loss": 0.5041,
"step": 4039
},
{
"epoch": 0.87,
"grad_norm": 0.18619798123836517,
"learning_rate": 4.3473590906088046e-07,
"loss": 0.5027,
"step": 4040
},
{
"epoch": 0.87,
"grad_norm": 0.16709107160568237,
"learning_rate": 4.3331401835071783e-07,
"loss": 0.4971,
"step": 4041
},
{
"epoch": 0.87,
"grad_norm": 0.1601034700870514,
"learning_rate": 4.3189435141070324e-07,
"loss": 0.5241,
"step": 4042
},
{
"epoch": 0.87,
"grad_norm": 0.15669238567352295,
"learning_rate": 4.304769089321481e-07,
"loss": 0.5291,
"step": 4043
},
{
"epoch": 0.87,
"grad_norm": 0.14634265005588531,
"learning_rate": 4.2906169160528424e-07,
"loss": 0.5253,
"step": 4044
},
{
"epoch": 0.87,
"grad_norm": 0.1932663768529892,
"learning_rate": 4.276487001192592e-07,
"loss": 0.5096,
"step": 4045
},
{
"epoch": 0.87,
"grad_norm": 0.12650729715824127,
"learning_rate": 4.262379351621354e-07,
"loss": 0.5037,
"step": 4046
},
{
"epoch": 0.87,
"grad_norm": 0.1783479005098343,
"learning_rate": 4.248293974208928e-07,
"loss": 0.5197,
"step": 4047
},
{
"epoch": 0.87,
"grad_norm": 0.17106756567955017,
"learning_rate": 4.2342308758142437e-07,
"loss": 0.4908,
"step": 4048
},
{
"epoch": 0.87,
"grad_norm": 0.1578291952610016,
"learning_rate": 4.220190063285401e-07,
"loss": 0.5028,
"step": 4049
},
{
"epoch": 0.87,
"grad_norm": 0.17856548726558685,
"learning_rate": 4.2061715434596475e-07,
"loss": 0.4998,
"step": 4050
},
{
"epoch": 0.87,
"grad_norm": 0.19411097466945648,
"learning_rate": 4.192175323163361e-07,
"loss": 0.5383,
"step": 4051
},
{
"epoch": 0.87,
"grad_norm": 0.1397572010755539,
"learning_rate": 4.1782014092120735e-07,
"loss": 0.4779,
"step": 4052
},
{
"epoch": 0.87,
"grad_norm": 0.12479076534509659,
"learning_rate": 4.164249808410459e-07,
"loss": 0.498,
"step": 4053
},
{
"epoch": 0.87,
"grad_norm": 0.13633649051189423,
"learning_rate": 4.150320527552304e-07,
"loss": 0.5257,
"step": 4054
},
{
"epoch": 0.87,
"grad_norm": 0.16726909577846527,
"learning_rate": 4.1364135734205556e-07,
"loss": 0.4955,
"step": 4055
},
{
"epoch": 0.87,
"grad_norm": 0.1693604290485382,
"learning_rate": 4.122528952787258e-07,
"loss": 0.5903,
"step": 4056
},
{
"epoch": 0.87,
"grad_norm": 0.13616541028022766,
"learning_rate": 4.1086666724136024e-07,
"loss": 0.4837,
"step": 4057
},
{
"epoch": 0.87,
"grad_norm": 0.14842045307159424,
"learning_rate": 4.0948267390498953e-07,
"loss": 0.4777,
"step": 4058
},
{
"epoch": 0.87,
"grad_norm": 0.15691286325454712,
"learning_rate": 4.0810091594355674e-07,
"loss": 0.4684,
"step": 4059
},
{
"epoch": 0.87,
"grad_norm": 0.20302332937717438,
"learning_rate": 4.067213940299136e-07,
"loss": 0.5461,
"step": 4060
},
{
"epoch": 0.87,
"grad_norm": 0.1701618880033493,
"learning_rate": 4.0534410883582673e-07,
"loss": 0.5253,
"step": 4061
},
{
"epoch": 0.88,
"grad_norm": 0.16087806224822998,
"learning_rate": 4.0396906103197244e-07,
"loss": 0.5728,
"step": 4062
},
{
"epoch": 0.88,
"grad_norm": 0.1731209009885788,
"learning_rate": 4.02596251287935e-07,
"loss": 0.4793,
"step": 4063
},
{
"epoch": 0.88,
"grad_norm": 0.15619364380836487,
"learning_rate": 4.01225680272212e-07,
"loss": 0.5675,
"step": 4064
},
{
"epoch": 0.88,
"grad_norm": 0.14686357975006104,
"learning_rate": 3.998573486522095e-07,
"loss": 0.5241,
"step": 4065
},
{
"epoch": 0.88,
"grad_norm": 0.14786110818386078,
"learning_rate": 3.984912570942434e-07,
"loss": 0.5098,
"step": 4066
},
{
"epoch": 0.88,
"grad_norm": 0.1765190064907074,
"learning_rate": 3.9712740626354e-07,
"loss": 0.6106,
"step": 4067
},
{
"epoch": 0.88,
"grad_norm": 0.14524182677268982,
"learning_rate": 3.9576579682423066e-07,
"loss": 0.5239,
"step": 4068
},
{
"epoch": 0.88,
"grad_norm": 0.18796804547309875,
"learning_rate": 3.9440642943936013e-07,
"loss": 0.4934,
"step": 4069
},
{
"epoch": 0.88,
"grad_norm": 0.13147200644016266,
"learning_rate": 3.930493047708761e-07,
"loss": 0.5417,
"step": 4070
},
{
"epoch": 0.88,
"grad_norm": 0.137882798910141,
"learning_rate": 3.916944234796399e-07,
"loss": 0.4724,
"step": 4071
},
{
"epoch": 0.88,
"grad_norm": 0.1864192932844162,
"learning_rate": 3.903417862254172e-07,
"loss": 0.4951,
"step": 4072
},
{
"epoch": 0.88,
"grad_norm": 0.14649604260921478,
"learning_rate": 3.8899139366687985e-07,
"loss": 0.5297,
"step": 4073
},
{
"epoch": 0.88,
"grad_norm": 0.19774487614631653,
"learning_rate": 3.876432464616103e-07,
"loss": 0.5174,
"step": 4074
},
{
"epoch": 0.88,
"grad_norm": 0.12834720313549042,
"learning_rate": 3.862973452660929e-07,
"loss": 0.523,
"step": 4075
},
{
"epoch": 0.88,
"grad_norm": 0.1609206348657608,
"learning_rate": 3.8495369073572266e-07,
"loss": 0.5635,
"step": 4076
},
{
"epoch": 0.88,
"grad_norm": 0.1672678291797638,
"learning_rate": 3.8361228352479795e-07,
"loss": 0.478,
"step": 4077
},
{
"epoch": 0.88,
"grad_norm": 0.13725100457668304,
"learning_rate": 3.822731242865235e-07,
"loss": 0.5276,
"step": 4078
},
{
"epoch": 0.88,
"grad_norm": 0.1619109809398651,
"learning_rate": 3.8093621367301103e-07,
"loss": 0.5497,
"step": 4079
},
{
"epoch": 0.88,
"grad_norm": 0.18122999370098114,
"learning_rate": 3.7960155233527364e-07,
"loss": 0.5882,
"step": 4080
},
{
"epoch": 0.88,
"grad_norm": 0.15119299292564392,
"learning_rate": 3.782691409232325e-07,
"loss": 0.4459,
"step": 4081
},
{
"epoch": 0.88,
"grad_norm": 0.164114847779274,
"learning_rate": 3.7693898008571205e-07,
"loss": 0.525,
"step": 4082
},
{
"epoch": 0.88,
"grad_norm": 0.1447734236717224,
"learning_rate": 3.75611070470438e-07,
"loss": 0.542,
"step": 4083
},
{
"epoch": 0.88,
"grad_norm": 0.16693483293056488,
"learning_rate": 3.742854127240464e-07,
"loss": 0.5254,
"step": 4084
},
{
"epoch": 0.88,
"grad_norm": 0.1440124213695526,
"learning_rate": 3.7296200749207034e-07,
"loss": 0.4841,
"step": 4085
},
{
"epoch": 0.88,
"grad_norm": 0.14543622732162476,
"learning_rate": 3.7164085541894937e-07,
"loss": 0.5613,
"step": 4086
},
{
"epoch": 0.88,
"grad_norm": 0.16933149099349976,
"learning_rate": 3.703219571480249e-07,
"loss": 0.5304,
"step": 4087
},
{
"epoch": 0.88,
"grad_norm": 0.14789710938930511,
"learning_rate": 3.690053133215399e-07,
"loss": 0.5256,
"step": 4088
},
{
"epoch": 0.88,
"grad_norm": 0.16581717133522034,
"learning_rate": 3.676909245806415e-07,
"loss": 0.5014,
"step": 4089
},
{
"epoch": 0.88,
"grad_norm": 0.13003475964069366,
"learning_rate": 3.663787915653777e-07,
"loss": 0.5366,
"step": 4090
},
{
"epoch": 0.88,
"grad_norm": 0.14335590600967407,
"learning_rate": 3.650689149146991e-07,
"loss": 0.5642,
"step": 4091
},
{
"epoch": 0.88,
"grad_norm": 0.20606780052185059,
"learning_rate": 3.6376129526645376e-07,
"loss": 0.5484,
"step": 4092
},
{
"epoch": 0.88,
"grad_norm": 0.1340981125831604,
"learning_rate": 3.624559332573957e-07,
"loss": 0.4645,
"step": 4093
},
{
"epoch": 0.88,
"grad_norm": 0.14133349061012268,
"learning_rate": 3.6115282952317807e-07,
"loss": 0.4575,
"step": 4094
},
{
"epoch": 0.88,
"grad_norm": 0.16861362755298615,
"learning_rate": 3.598519846983511e-07,
"loss": 0.4783,
"step": 4095
},
{
"epoch": 0.88,
"grad_norm": 0.126511812210083,
"learning_rate": 3.5855339941636867e-07,
"loss": 0.4925,
"step": 4096
},
{
"epoch": 0.88,
"grad_norm": 0.17741841077804565,
"learning_rate": 3.572570743095838e-07,
"loss": 0.4844,
"step": 4097
},
{
"epoch": 0.88,
"grad_norm": 0.13794460892677307,
"learning_rate": 3.5596301000924815e-07,
"loss": 0.5503,
"step": 4098
},
{
"epoch": 0.88,
"grad_norm": 0.14488175511360168,
"learning_rate": 3.546712071455127e-07,
"loss": 0.4982,
"step": 4099
},
{
"epoch": 0.88,
"grad_norm": 0.2083345204591751,
"learning_rate": 3.533816663474271e-07,
"loss": 0.4913,
"step": 4100
},
{
"epoch": 0.88,
"grad_norm": 0.15425090491771698,
"learning_rate": 3.5209438824293896e-07,
"loss": 0.5406,
"step": 4101
},
{
"epoch": 0.88,
"grad_norm": 0.19911810755729675,
"learning_rate": 3.508093734588952e-07,
"loss": 0.4975,
"step": 4102
},
{
"epoch": 0.88,
"grad_norm": 0.14716565608978271,
"learning_rate": 3.4952662262104033e-07,
"loss": 0.4834,
"step": 4103
},
{
"epoch": 0.88,
"grad_norm": 0.18182729184627533,
"learning_rate": 3.482461363540163e-07,
"loss": 0.5785,
"step": 4104
},
{
"epoch": 0.88,
"grad_norm": 0.16187834739685059,
"learning_rate": 3.46967915281361e-07,
"loss": 0.5109,
"step": 4105
},
{
"epoch": 0.88,
"grad_norm": 0.13375143706798553,
"learning_rate": 3.456919600255126e-07,
"loss": 0.4686,
"step": 4106
},
{
"epoch": 0.88,
"grad_norm": 0.16278614103794098,
"learning_rate": 3.4441827120780147e-07,
"loss": 0.5005,
"step": 4107
},
{
"epoch": 0.88,
"grad_norm": 0.2082134485244751,
"learning_rate": 3.4314684944845747e-07,
"loss": 0.4992,
"step": 4108
},
{
"epoch": 0.89,
"grad_norm": 0.134648859500885,
"learning_rate": 3.4187769536660533e-07,
"loss": 0.4795,
"step": 4109
},
{
"epoch": 0.89,
"grad_norm": 0.1557423323392868,
"learning_rate": 3.406108095802668e-07,
"loss": 0.5266,
"step": 4110
},
{
"epoch": 0.89,
"grad_norm": 0.15993043780326843,
"learning_rate": 3.393461927063585e-07,
"loss": 0.4967,
"step": 4111
},
{
"epoch": 0.89,
"grad_norm": 0.12335589528083801,
"learning_rate": 3.3808384536068997e-07,
"loss": 0.5044,
"step": 4112
},
{
"epoch": 0.89,
"grad_norm": 0.1581617146730423,
"learning_rate": 3.3682376815796834e-07,
"loss": 0.5087,
"step": 4113
},
{
"epoch": 0.89,
"grad_norm": 0.1484224498271942,
"learning_rate": 3.3556596171179455e-07,
"loss": 0.4655,
"step": 4114
},
{
"epoch": 0.89,
"grad_norm": 0.16247932612895966,
"learning_rate": 3.343104266346636e-07,
"loss": 0.4964,
"step": 4115
},
{
"epoch": 0.89,
"grad_norm": 0.18105369806289673,
"learning_rate": 3.3305716353796537e-07,
"loss": 0.5231,
"step": 4116
},
{
"epoch": 0.89,
"grad_norm": 0.16214075684547424,
"learning_rate": 3.3180617303198046e-07,
"loss": 0.546,
"step": 4117
},
{
"epoch": 0.89,
"grad_norm": 0.16236190497875214,
"learning_rate": 3.305574557258867e-07,
"loss": 0.4751,
"step": 4118
},
{
"epoch": 0.89,
"grad_norm": 0.1777781993150711,
"learning_rate": 3.2931101222775154e-07,
"loss": 0.5563,
"step": 4119
},
{
"epoch": 0.89,
"grad_norm": 0.17919768393039703,
"learning_rate": 3.2806684314453774e-07,
"loss": 0.4581,
"step": 4120
},
{
"epoch": 0.89,
"grad_norm": 0.16179294884204865,
"learning_rate": 3.2682494908209906e-07,
"loss": 0.5197,
"step": 4121
},
{
"epoch": 0.89,
"grad_norm": 0.14991389214992523,
"learning_rate": 3.255853306451823e-07,
"loss": 0.5574,
"step": 4122
},
{
"epoch": 0.89,
"grad_norm": 0.22062784433364868,
"learning_rate": 3.243479884374262e-07,
"loss": 0.5563,
"step": 4123
},
{
"epoch": 0.89,
"grad_norm": 0.15176159143447876,
"learning_rate": 3.2311292306135944e-07,
"loss": 0.4785,
"step": 4124
},
{
"epoch": 0.89,
"grad_norm": 0.1553657054901123,
"learning_rate": 3.2188013511840365e-07,
"loss": 0.5524,
"step": 4125
},
{
"epoch": 0.89,
"grad_norm": 0.2217596471309662,
"learning_rate": 3.2064962520887146e-07,
"loss": 0.4976,
"step": 4126
},
{
"epoch": 0.89,
"grad_norm": 0.1558333784341812,
"learning_rate": 3.194213939319646e-07,
"loss": 0.5259,
"step": 4127
},
{
"epoch": 0.89,
"grad_norm": 0.1418876200914383,
"learning_rate": 3.18195441885778e-07,
"loss": 0.5342,
"step": 4128
},
{
"epoch": 0.89,
"grad_norm": 0.12174227088689804,
"learning_rate": 3.169717696672936e-07,
"loss": 0.5027,
"step": 4129
},
{
"epoch": 0.89,
"grad_norm": 0.12317800521850586,
"learning_rate": 3.157503778723847e-07,
"loss": 0.5245,
"step": 4130
},
{
"epoch": 0.89,
"grad_norm": 0.14522142708301544,
"learning_rate": 3.145312670958156e-07,
"loss": 0.4962,
"step": 4131
},
{
"epoch": 0.89,
"grad_norm": 0.1550437957048416,
"learning_rate": 3.1331443793123585e-07,
"loss": 0.4934,
"step": 4132
},
{
"epoch": 0.89,
"grad_norm": 0.15259462594985962,
"learning_rate": 3.120998909711881e-07,
"loss": 0.4907,
"step": 4133
},
{
"epoch": 0.89,
"grad_norm": 0.12828658521175385,
"learning_rate": 3.108876268071009e-07,
"loss": 0.4977,
"step": 4134
},
{
"epoch": 0.89,
"grad_norm": 0.18181227147579193,
"learning_rate": 3.096776460292927e-07,
"loss": 0.4883,
"step": 4135
},
{
"epoch": 0.89,
"grad_norm": 0.16961906850337982,
"learning_rate": 3.0846994922697104e-07,
"loss": 0.5045,
"step": 4136
},
{
"epoch": 0.89,
"grad_norm": 0.14249150454998016,
"learning_rate": 3.072645369882271e-07,
"loss": 0.5097,
"step": 4137
},
{
"epoch": 0.89,
"grad_norm": 0.1759713590145111,
"learning_rate": 3.060614099000442e-07,
"loss": 0.4703,
"step": 4138
},
{
"epoch": 0.89,
"grad_norm": 0.1403285712003708,
"learning_rate": 3.048605685482892e-07,
"loss": 0.4779,
"step": 4139
},
{
"epoch": 0.89,
"grad_norm": 0.15652833878993988,
"learning_rate": 3.0366201351771983e-07,
"loss": 0.4843,
"step": 4140
},
{
"epoch": 0.89,
"grad_norm": 0.1837598830461502,
"learning_rate": 3.024657453919777e-07,
"loss": 0.5272,
"step": 4141
},
{
"epoch": 0.89,
"grad_norm": 0.1516779363155365,
"learning_rate": 3.0127176475359065e-07,
"loss": 0.5174,
"step": 4142
},
{
"epoch": 0.89,
"grad_norm": 0.15141400694847107,
"learning_rate": 3.0008007218397415e-07,
"loss": 0.5148,
"step": 4143
},
{
"epoch": 0.89,
"grad_norm": 0.1702878326177597,
"learning_rate": 2.988906682634285e-07,
"loss": 0.5279,
"step": 4144
},
{
"epoch": 0.89,
"grad_norm": 0.17491145431995392,
"learning_rate": 2.977035535711392e-07,
"loss": 0.5319,
"step": 4145
},
{
"epoch": 0.89,
"grad_norm": 0.16550621390342712,
"learning_rate": 2.965187286851784e-07,
"loss": 0.5071,
"step": 4146
},
{
"epoch": 0.89,
"grad_norm": 0.19162628054618835,
"learning_rate": 2.953361941825017e-07,
"loss": 0.5123,
"step": 4147
},
{
"epoch": 0.89,
"grad_norm": 0.15393443405628204,
"learning_rate": 2.941559506389513e-07,
"loss": 0.5035,
"step": 4148
},
{
"epoch": 0.89,
"grad_norm": 0.20023614168167114,
"learning_rate": 2.9297799862925136e-07,
"loss": 0.5585,
"step": 4149
},
{
"epoch": 0.89,
"grad_norm": 0.13044221699237823,
"learning_rate": 2.9180233872701247e-07,
"loss": 0.4811,
"step": 4150
},
{
"epoch": 0.89,
"grad_norm": 0.13851873576641083,
"learning_rate": 2.906289715047267e-07,
"loss": 0.5445,
"step": 4151
},
{
"epoch": 0.89,
"grad_norm": 0.15511353313922882,
"learning_rate": 2.894578975337703e-07,
"loss": 0.5014,
"step": 4152
},
{
"epoch": 0.89,
"grad_norm": 0.12846341729164124,
"learning_rate": 2.8828911738440713e-07,
"loss": 0.4618,
"step": 4153
},
{
"epoch": 0.89,
"grad_norm": 0.19906170666217804,
"learning_rate": 2.8712263162577636e-07,
"loss": 0.5116,
"step": 4154
},
{
"epoch": 0.9,
"grad_norm": 0.16838042438030243,
"learning_rate": 2.8595844082590695e-07,
"loss": 0.4673,
"step": 4155
},
{
"epoch": 0.9,
"grad_norm": 0.14445045590400696,
"learning_rate": 2.8479654555170546e-07,
"loss": 0.5171,
"step": 4156
},
{
"epoch": 0.9,
"grad_norm": 0.14654265344142914,
"learning_rate": 2.836369463689631e-07,
"loss": 0.5053,
"step": 4157
},
{
"epoch": 0.9,
"grad_norm": 0.14703992009162903,
"learning_rate": 2.8247964384235214e-07,
"loss": 0.5053,
"step": 4158
},
{
"epoch": 0.9,
"grad_norm": 0.15812741219997406,
"learning_rate": 2.813246385354268e-07,
"loss": 0.5083,
"step": 4159
},
{
"epoch": 0.9,
"grad_norm": 0.18599557876586914,
"learning_rate": 2.8017193101062377e-07,
"loss": 0.5569,
"step": 4160
},
{
"epoch": 0.9,
"grad_norm": 0.16005754470825195,
"learning_rate": 2.7902152182925746e-07,
"loss": 0.5608,
"step": 4161
},
{
"epoch": 0.9,
"grad_norm": 0.1266726851463318,
"learning_rate": 2.778734115515269e-07,
"loss": 0.536,
"step": 4162
},
{
"epoch": 0.9,
"grad_norm": 0.15541070699691772,
"learning_rate": 2.7672760073650996e-07,
"loss": 0.5001,
"step": 4163
},
{
"epoch": 0.9,
"grad_norm": 0.12981733679771423,
"learning_rate": 2.755840899421636e-07,
"loss": 0.5071,
"step": 4164
},
{
"epoch": 0.9,
"grad_norm": 0.14456294476985931,
"learning_rate": 2.744428797253268e-07,
"loss": 0.4775,
"step": 4165
},
{
"epoch": 0.9,
"grad_norm": 0.16282424330711365,
"learning_rate": 2.7330397064171787e-07,
"loss": 0.5036,
"step": 4166
},
{
"epoch": 0.9,
"grad_norm": 0.13770410418510437,
"learning_rate": 2.7216736324593316e-07,
"loss": 0.4779,
"step": 4167
},
{
"epoch": 0.9,
"grad_norm": 0.1373995691537857,
"learning_rate": 2.7103305809145106e-07,
"loss": 0.5212,
"step": 4168
},
{
"epoch": 0.9,
"grad_norm": 0.13789519667625427,
"learning_rate": 2.699010557306253e-07,
"loss": 0.5334,
"step": 4169
},
{
"epoch": 0.9,
"grad_norm": 0.15480732917785645,
"learning_rate": 2.687713567146899e-07,
"loss": 0.5061,
"step": 4170
},
{
"epoch": 0.9,
"grad_norm": 0.1765686720609665,
"learning_rate": 2.676439615937582e-07,
"loss": 0.4803,
"step": 4171
},
{
"epoch": 0.9,
"grad_norm": 0.13667207956314087,
"learning_rate": 2.665188709168215e-07,
"loss": 0.529,
"step": 4172
},
{
"epoch": 0.9,
"grad_norm": 0.1658671498298645,
"learning_rate": 2.6539608523174665e-07,
"loss": 0.5231,
"step": 4173
},
{
"epoch": 0.9,
"grad_norm": 0.1876867711544037,
"learning_rate": 2.642756050852796e-07,
"loss": 0.5373,
"step": 4174
},
{
"epoch": 0.9,
"grad_norm": 0.1558872014284134,
"learning_rate": 2.631574310230456e-07,
"loss": 0.5224,
"step": 4175
},
{
"epoch": 0.9,
"grad_norm": 0.15014420449733734,
"learning_rate": 2.620415635895429e-07,
"loss": 0.5415,
"step": 4176
},
{
"epoch": 0.9,
"grad_norm": 0.14560414850711823,
"learning_rate": 2.6092800332814914e-07,
"loss": 0.4483,
"step": 4177
},
{
"epoch": 0.9,
"grad_norm": 0.19036678969860077,
"learning_rate": 2.5981675078111835e-07,
"loss": 0.545,
"step": 4178
},
{
"epoch": 0.9,
"grad_norm": 0.3053010404109955,
"learning_rate": 2.587078064895804e-07,
"loss": 0.5174,
"step": 4179
},
{
"epoch": 0.9,
"grad_norm": 0.14357438683509827,
"learning_rate": 2.5760117099354163e-07,
"loss": 0.484,
"step": 4180
},
{
"epoch": 0.9,
"grad_norm": 0.1921653300523758,
"learning_rate": 2.5649684483188274e-07,
"loss": 0.5016,
"step": 4181
},
{
"epoch": 0.9,
"grad_norm": 0.15328127145767212,
"learning_rate": 2.5539482854236076e-07,
"loss": 0.4675,
"step": 4182
},
{
"epoch": 0.9,
"grad_norm": 0.16819103062152863,
"learning_rate": 2.5429512266160805e-07,
"loss": 0.4982,
"step": 4183
},
{
"epoch": 0.9,
"grad_norm": 0.16778436303138733,
"learning_rate": 2.531977277251324e-07,
"loss": 0.5475,
"step": 4184
},
{
"epoch": 0.9,
"grad_norm": 0.1751207560300827,
"learning_rate": 2.521026442673158e-07,
"loss": 0.5167,
"step": 4185
},
{
"epoch": 0.9,
"grad_norm": 0.16310207545757294,
"learning_rate": 2.510098728214133e-07,
"loss": 0.5168,
"step": 4186
},
{
"epoch": 0.9,
"grad_norm": 0.17500171065330505,
"learning_rate": 2.4991941391955654e-07,
"loss": 0.5662,
"step": 4187
},
{
"epoch": 0.9,
"grad_norm": 0.15705259144306183,
"learning_rate": 2.488312680927485e-07,
"loss": 0.5006,
"step": 4188
},
{
"epoch": 0.9,
"grad_norm": 0.18603339791297913,
"learning_rate": 2.4774543587086807e-07,
"loss": 0.5122,
"step": 4189
},
{
"epoch": 0.9,
"grad_norm": 0.18599247932434082,
"learning_rate": 2.466619177826668e-07,
"loss": 0.5189,
"step": 4190
},
{
"epoch": 0.9,
"grad_norm": 0.1684497594833374,
"learning_rate": 2.4558071435576813e-07,
"loss": 0.5395,
"step": 4191
},
{
"epoch": 0.9,
"grad_norm": 0.15068615972995758,
"learning_rate": 2.4450182611667096e-07,
"loss": 0.4888,
"step": 4192
},
{
"epoch": 0.9,
"grad_norm": 0.1370285153388977,
"learning_rate": 2.4342525359074385e-07,
"loss": 0.5118,
"step": 4193
},
{
"epoch": 0.9,
"grad_norm": 0.16400447487831116,
"learning_rate": 2.423509973022292e-07,
"loss": 0.4971,
"step": 4194
},
{
"epoch": 0.9,
"grad_norm": 0.22781065106391907,
"learning_rate": 2.4127905777424134e-07,
"loss": 0.5495,
"step": 4195
},
{
"epoch": 0.9,
"grad_norm": 0.1345801055431366,
"learning_rate": 2.4020943552876706e-07,
"loss": 0.509,
"step": 4196
},
{
"epoch": 0.9,
"grad_norm": 0.15502989292144775,
"learning_rate": 2.391421310866648e-07,
"loss": 0.5564,
"step": 4197
},
{
"epoch": 0.9,
"grad_norm": 0.13414627313613892,
"learning_rate": 2.3807714496766165e-07,
"loss": 0.5253,
"step": 4198
},
{
"epoch": 0.9,
"grad_norm": 0.1705794483423233,
"learning_rate": 2.370144776903599e-07,
"loss": 0.4849,
"step": 4199
},
{
"epoch": 0.9,
"grad_norm": 0.16182225942611694,
"learning_rate": 2.3595412977222897e-07,
"loss": 0.5487,
"step": 4200
},
{
"epoch": 0.9,
"grad_norm": 0.18094182014465332,
"learning_rate": 2.3489610172961143e-07,
"loss": 0.4966,
"step": 4201
},
{
"epoch": 0.91,
"grad_norm": 0.134856179356575,
"learning_rate": 2.3384039407771896e-07,
"loss": 0.5284,
"step": 4202
},
{
"epoch": 0.91,
"grad_norm": 0.12742473185062408,
"learning_rate": 2.327870073306332e-07,
"loss": 0.5371,
"step": 4203
},
{
"epoch": 0.91,
"grad_norm": 0.16482314467430115,
"learning_rate": 2.317359420013071e-07,
"loss": 0.5241,
"step": 4204
},
{
"epoch": 0.91,
"grad_norm": 0.17178313434123993,
"learning_rate": 2.306871986015613e-07,
"loss": 0.5146,
"step": 4205
},
{
"epoch": 0.91,
"grad_norm": 0.16056092083454132,
"learning_rate": 2.2964077764208615e-07,
"loss": 0.5389,
"step": 4206
},
{
"epoch": 0.91,
"grad_norm": 0.18820203840732574,
"learning_rate": 2.2859667963244236e-07,
"loss": 0.4964,
"step": 4207
},
{
"epoch": 0.91,
"grad_norm": 0.18173396587371826,
"learning_rate": 2.2755490508105716e-07,
"loss": 0.5323,
"step": 4208
},
{
"epoch": 0.91,
"grad_norm": 0.15220309793949127,
"learning_rate": 2.2651545449522972e-07,
"loss": 0.477,
"step": 4209
},
{
"epoch": 0.91,
"grad_norm": 0.17373429238796234,
"learning_rate": 2.254783283811246e-07,
"loss": 0.5649,
"step": 4210
},
{
"epoch": 0.91,
"grad_norm": 0.1504889577627182,
"learning_rate": 2.2444352724377505e-07,
"loss": 0.5183,
"step": 4211
},
{
"epoch": 0.91,
"grad_norm": 0.1400587409734726,
"learning_rate": 2.2341105158708408e-07,
"loss": 0.5101,
"step": 4212
},
{
"epoch": 0.91,
"grad_norm": 0.18521972000598907,
"learning_rate": 2.22380901913819e-07,
"loss": 0.4801,
"step": 4213
},
{
"epoch": 0.91,
"grad_norm": 0.1672522872686386,
"learning_rate": 2.2135307872561628e-07,
"loss": 0.4725,
"step": 4214
},
{
"epoch": 0.91,
"grad_norm": 0.15692496299743652,
"learning_rate": 2.2032758252298115e-07,
"loss": 0.5603,
"step": 4215
},
{
"epoch": 0.91,
"grad_norm": 0.18013040721416473,
"learning_rate": 2.1930441380528243e-07,
"loss": 0.5292,
"step": 4216
},
{
"epoch": 0.91,
"grad_norm": 0.14542804658412933,
"learning_rate": 2.182835730707583e-07,
"loss": 0.5056,
"step": 4217
},
{
"epoch": 0.91,
"grad_norm": 0.16739703714847565,
"learning_rate": 2.172650608165111e-07,
"loss": 0.4897,
"step": 4218
},
{
"epoch": 0.91,
"grad_norm": 0.16817772388458252,
"learning_rate": 2.1624887753851186e-07,
"loss": 0.515,
"step": 4219
},
{
"epoch": 0.91,
"grad_norm": 0.1342426985502243,
"learning_rate": 2.1523502373159367e-07,
"loss": 0.4996,
"step": 4220
},
{
"epoch": 0.91,
"grad_norm": 0.3892795741558075,
"learning_rate": 2.142234998894588e-07,
"loss": 0.4838,
"step": 4221
},
{
"epoch": 0.91,
"grad_norm": 0.16268621385097504,
"learning_rate": 2.1321430650467546e-07,
"loss": 0.5302,
"step": 4222
},
{
"epoch": 0.91,
"grad_norm": 0.12644894421100616,
"learning_rate": 2.1220744406867278e-07,
"loss": 0.5567,
"step": 4223
},
{
"epoch": 0.91,
"grad_norm": 0.1844691038131714,
"learning_rate": 2.112029130717491e-07,
"loss": 0.6264,
"step": 4224
},
{
"epoch": 0.91,
"grad_norm": 0.18971168994903564,
"learning_rate": 2.1020071400306429e-07,
"loss": 0.5327,
"step": 4225
},
{
"epoch": 0.91,
"grad_norm": 0.11999719589948654,
"learning_rate": 2.092008473506446e-07,
"loss": 0.5153,
"step": 4226
},
{
"epoch": 0.91,
"grad_norm": 0.12612876296043396,
"learning_rate": 2.0820331360138058e-07,
"loss": 0.4838,
"step": 4227
},
{
"epoch": 0.91,
"grad_norm": 0.15707595646381378,
"learning_rate": 2.072081132410253e-07,
"loss": 0.5158,
"step": 4228
},
{
"epoch": 0.91,
"grad_norm": 0.14865291118621826,
"learning_rate": 2.062152467541978e-07,
"loss": 0.512,
"step": 4229
},
{
"epoch": 0.91,
"grad_norm": 0.17846401035785675,
"learning_rate": 2.0522471462437798e-07,
"loss": 0.4865,
"step": 4230
},
{
"epoch": 0.91,
"grad_norm": 0.15374383330345154,
"learning_rate": 2.042365173339117e-07,
"loss": 0.5007,
"step": 4231
},
{
"epoch": 0.91,
"grad_norm": 0.14291195571422577,
"learning_rate": 2.0325065536400456e-07,
"loss": 0.5102,
"step": 4232
},
{
"epoch": 0.91,
"grad_norm": 0.12746839225292206,
"learning_rate": 2.02267129194727e-07,
"loss": 0.51,
"step": 4233
},
{
"epoch": 0.91,
"grad_norm": 0.16647961735725403,
"learning_rate": 2.0128593930501427e-07,
"loss": 0.5033,
"step": 4234
},
{
"epoch": 0.91,
"grad_norm": 0.16872666776180267,
"learning_rate": 2.0030708617265971e-07,
"loss": 0.4992,
"step": 4235
},
{
"epoch": 0.91,
"grad_norm": 0.13757802546024323,
"learning_rate": 1.9933057027432147e-07,
"loss": 0.5519,
"step": 4236
},
{
"epoch": 0.91,
"grad_norm": 0.15467625856399536,
"learning_rate": 1.9835639208551803e-07,
"loss": 0.5208,
"step": 4237
},
{
"epoch": 0.91,
"grad_norm": 0.14848686754703522,
"learning_rate": 1.9738455208063055e-07,
"loss": 0.5348,
"step": 4238
},
{
"epoch": 0.91,
"grad_norm": 0.18028004467487335,
"learning_rate": 1.9641505073290103e-07,
"loss": 0.5313,
"step": 4239
},
{
"epoch": 0.91,
"grad_norm": 0.19385437667369843,
"learning_rate": 1.9544788851443342e-07,
"loss": 0.5109,
"step": 4240
},
{
"epoch": 0.91,
"grad_norm": 0.1776755303144455,
"learning_rate": 1.944830658961927e-07,
"loss": 0.4881,
"step": 4241
},
{
"epoch": 0.91,
"grad_norm": 0.15795911848545074,
"learning_rate": 1.9352058334800195e-07,
"loss": 0.5299,
"step": 4242
},
{
"epoch": 0.91,
"grad_norm": 0.1372847557067871,
"learning_rate": 1.9256044133854846e-07,
"loss": 0.5026,
"step": 4243
},
{
"epoch": 0.91,
"grad_norm": 0.1478043794631958,
"learning_rate": 1.9160264033537824e-07,
"loss": 0.4663,
"step": 4244
},
{
"epoch": 0.91,
"grad_norm": 0.16185085475444794,
"learning_rate": 1.9064718080489596e-07,
"loss": 0.4501,
"step": 4245
},
{
"epoch": 0.91,
"grad_norm": 0.15890911221504211,
"learning_rate": 1.8969406321236727e-07,
"loss": 0.5688,
"step": 4246
},
{
"epoch": 0.91,
"grad_norm": 0.1747117042541504,
"learning_rate": 1.8874328802191867e-07,
"loss": 0.5213,
"step": 4247
},
{
"epoch": 0.92,
"grad_norm": 0.15179674327373505,
"learning_rate": 1.8779485569653422e-07,
"loss": 0.5252,
"step": 4248
},
{
"epoch": 0.92,
"grad_norm": 0.1634942591190338,
"learning_rate": 1.868487666980584e-07,
"loss": 0.4914,
"step": 4249
},
{
"epoch": 0.92,
"grad_norm": 0.13174600899219513,
"learning_rate": 1.859050214871927e-07,
"loss": 0.5337,
"step": 4250
},
{
"epoch": 0.92,
"grad_norm": 0.1955437809228897,
"learning_rate": 1.8496362052349893e-07,
"loss": 0.4623,
"step": 4251
},
{
"epoch": 0.92,
"grad_norm": 0.14583423733711243,
"learning_rate": 1.8402456426539706e-07,
"loss": 0.5413,
"step": 4252
},
{
"epoch": 0.92,
"grad_norm": 0.15666338801383972,
"learning_rate": 1.830878531701652e-07,
"loss": 0.4953,
"step": 4253
},
{
"epoch": 0.92,
"grad_norm": 0.15777826309204102,
"learning_rate": 1.8215348769393904e-07,
"loss": 0.5767,
"step": 4254
},
{
"epoch": 0.92,
"grad_norm": 0.1892169713973999,
"learning_rate": 1.8122146829171294e-07,
"loss": 0.5119,
"step": 4255
},
{
"epoch": 0.92,
"grad_norm": 0.13005930185317993,
"learning_rate": 1.8029179541733833e-07,
"loss": 0.5126,
"step": 4256
},
{
"epoch": 0.92,
"grad_norm": 0.15139774978160858,
"learning_rate": 1.7936446952352303e-07,
"loss": 0.5505,
"step": 4257
},
{
"epoch": 0.92,
"grad_norm": 0.18456581234931946,
"learning_rate": 1.7843949106183368e-07,
"loss": 0.4961,
"step": 4258
},
{
"epoch": 0.92,
"grad_norm": 0.1589013636112213,
"learning_rate": 1.7751686048269322e-07,
"loss": 0.5622,
"step": 4259
},
{
"epoch": 0.92,
"grad_norm": 0.16003479063510895,
"learning_rate": 1.7659657823538067e-07,
"loss": 0.5282,
"step": 4260
},
{
"epoch": 0.92,
"grad_norm": 0.15125080943107605,
"learning_rate": 1.7567864476803254e-07,
"loss": 0.4712,
"step": 4261
},
{
"epoch": 0.92,
"grad_norm": 0.14712797105312347,
"learning_rate": 1.747630605276407e-07,
"loss": 0.5371,
"step": 4262
},
{
"epoch": 0.92,
"grad_norm": 0.16056658327579498,
"learning_rate": 1.7384982596005352e-07,
"loss": 0.5107,
"step": 4263
},
{
"epoch": 0.92,
"grad_norm": 0.14780429005622864,
"learning_rate": 1.7293894150997414e-07,
"loss": 0.5311,
"step": 4264
},
{
"epoch": 0.92,
"grad_norm": 0.14676974713802338,
"learning_rate": 1.720304076209639e-07,
"loss": 0.4981,
"step": 4265
},
{
"epoch": 0.92,
"grad_norm": 0.14568917453289032,
"learning_rate": 1.711242247354372e-07,
"loss": 0.5237,
"step": 4266
},
{
"epoch": 0.92,
"grad_norm": 0.13407346606254578,
"learning_rate": 1.7022039329466333e-07,
"loss": 0.5197,
"step": 4267
},
{
"epoch": 0.92,
"grad_norm": 0.14667077362537384,
"learning_rate": 1.6931891373876852e-07,
"loss": 0.5155,
"step": 4268
},
{
"epoch": 0.92,
"grad_norm": 0.14774075150489807,
"learning_rate": 1.6841978650673218e-07,
"loss": 0.5261,
"step": 4269
},
{
"epoch": 0.92,
"grad_norm": 0.13004808127880096,
"learning_rate": 1.6752301203638854e-07,
"loss": 0.4793,
"step": 4270
},
{
"epoch": 0.92,
"grad_norm": 0.1556776911020279,
"learning_rate": 1.666285907644266e-07,
"loss": 0.5356,
"step": 4271
},
{
"epoch": 0.92,
"grad_norm": 0.1795538365840912,
"learning_rate": 1.657365231263891e-07,
"loss": 0.5119,
"step": 4272
},
{
"epoch": 0.92,
"grad_norm": 0.15904632210731506,
"learning_rate": 1.6484680955667354e-07,
"loss": 0.5485,
"step": 4273
},
{
"epoch": 0.92,
"grad_norm": 0.1445087045431137,
"learning_rate": 1.6395945048852947e-07,
"loss": 0.4932,
"step": 4274
},
{
"epoch": 0.92,
"grad_norm": 0.13816164433956146,
"learning_rate": 1.6307444635406011e-07,
"loss": 0.5038,
"step": 4275
},
{
"epoch": 0.92,
"grad_norm": 0.15279729664325714,
"learning_rate": 1.6219179758422465e-07,
"loss": 0.5235,
"step": 4276
},
{
"epoch": 0.92,
"grad_norm": 0.15122798085212708,
"learning_rate": 1.6131150460883038e-07,
"loss": 0.4975,
"step": 4277
},
{
"epoch": 0.92,
"grad_norm": 0.19103887677192688,
"learning_rate": 1.6043356785654273e-07,
"loss": 0.5026,
"step": 4278
},
{
"epoch": 0.92,
"grad_norm": 0.1535024344921112,
"learning_rate": 1.595579877548764e-07,
"loss": 0.5348,
"step": 4279
},
{
"epoch": 0.92,
"grad_norm": 0.17013922333717346,
"learning_rate": 1.5868476473019922e-07,
"loss": 0.528,
"step": 4280
},
{
"epoch": 0.92,
"grad_norm": 0.13540351390838623,
"learning_rate": 1.578138992077316e-07,
"loss": 0.5253,
"step": 4281
},
{
"epoch": 0.92,
"grad_norm": 0.14699843525886536,
"learning_rate": 1.5694539161154598e-07,
"loss": 0.4991,
"step": 4282
},
{
"epoch": 0.92,
"grad_norm": 0.1623685657978058,
"learning_rate": 1.560792423645663e-07,
"loss": 0.5254,
"step": 4283
},
{
"epoch": 0.92,
"grad_norm": 0.17117798328399658,
"learning_rate": 1.5521545188856734e-07,
"loss": 0.557,
"step": 4284
},
{
"epoch": 0.92,
"grad_norm": 0.16229890286922455,
"learning_rate": 1.5435402060417825e-07,
"loss": 0.5552,
"step": 4285
},
{
"epoch": 0.92,
"grad_norm": 0.28365910053253174,
"learning_rate": 1.5349494893087514e-07,
"loss": 0.5357,
"step": 4286
},
{
"epoch": 0.92,
"grad_norm": 0.1524672657251358,
"learning_rate": 1.526382372869878e-07,
"loss": 0.5343,
"step": 4287
},
{
"epoch": 0.92,
"grad_norm": 0.18612819910049438,
"learning_rate": 1.517838860896964e-07,
"loss": 0.4767,
"step": 4288
},
{
"epoch": 0.92,
"grad_norm": 0.15579423308372498,
"learning_rate": 1.50931895755031e-07,
"loss": 0.5174,
"step": 4289
},
{
"epoch": 0.92,
"grad_norm": 0.1574939489364624,
"learning_rate": 1.500822666978735e-07,
"loss": 0.4945,
"step": 4290
},
{
"epoch": 0.92,
"grad_norm": 0.13923248648643494,
"learning_rate": 1.492349993319536e-07,
"loss": 0.5056,
"step": 4291
},
{
"epoch": 0.92,
"grad_norm": 0.1429956555366516,
"learning_rate": 1.4839009406985295e-07,
"loss": 0.4775,
"step": 4292
},
{
"epoch": 0.92,
"grad_norm": 0.1344211846590042,
"learning_rate": 1.4754755132300292e-07,
"loss": 0.5308,
"step": 4293
},
{
"epoch": 0.92,
"grad_norm": 0.17861835658550262,
"learning_rate": 1.4670737150168257e-07,
"loss": 0.4766,
"step": 4294
},
{
"epoch": 0.93,
"grad_norm": 0.1777002215385437,
"learning_rate": 1.4586955501502186e-07,
"loss": 0.5361,
"step": 4295
},
{
"epoch": 0.93,
"grad_norm": 0.14904451370239258,
"learning_rate": 1.4503410227100057e-07,
"loss": 0.4903,
"step": 4296
},
{
"epoch": 0.93,
"grad_norm": 0.19658173620700836,
"learning_rate": 1.4420101367644602e-07,
"loss": 0.5013,
"step": 4297
},
{
"epoch": 0.93,
"grad_norm": 0.12814252078533173,
"learning_rate": 1.433702896370348e-07,
"loss": 0.5173,
"step": 4298
},
{
"epoch": 0.93,
"grad_norm": 0.1587502658367157,
"learning_rate": 1.4254193055729171e-07,
"loss": 0.5192,
"step": 4299
},
{
"epoch": 0.93,
"grad_norm": 0.16808383166790009,
"learning_rate": 1.417159368405907e-07,
"loss": 0.54,
"step": 4300
},
{
"epoch": 0.93,
"grad_norm": 0.14128008484840393,
"learning_rate": 1.408923088891534e-07,
"loss": 0.5069,
"step": 4301
},
{
"epoch": 0.93,
"grad_norm": 0.1637185662984848,
"learning_rate": 1.4007104710404838e-07,
"loss": 0.514,
"step": 4302
},
{
"epoch": 0.93,
"grad_norm": 0.1476011574268341,
"learning_rate": 1.3925215188519525e-07,
"loss": 0.5337,
"step": 4303
},
{
"epoch": 0.93,
"grad_norm": 0.1344112902879715,
"learning_rate": 1.384356236313572e-07,
"loss": 0.4939,
"step": 4304
},
{
"epoch": 0.93,
"grad_norm": 0.15150727331638336,
"learning_rate": 1.3762146274014842e-07,
"loss": 0.4818,
"step": 4305
},
{
"epoch": 0.93,
"grad_norm": 0.14989051222801208,
"learning_rate": 1.3680966960802623e-07,
"loss": 0.4746,
"step": 4306
},
{
"epoch": 0.93,
"grad_norm": 0.14494554698467255,
"learning_rate": 1.3600024463029938e-07,
"loss": 0.5037,
"step": 4307
},
{
"epoch": 0.93,
"grad_norm": 0.17142927646636963,
"learning_rate": 1.3519318820111983e-07,
"loss": 0.5133,
"step": 4308
},
{
"epoch": 0.93,
"grad_norm": 0.16990455985069275,
"learning_rate": 1.3438850071348874e-07,
"loss": 0.5251,
"step": 4309
},
{
"epoch": 0.93,
"grad_norm": 0.1605384796857834,
"learning_rate": 1.3358618255925214e-07,
"loss": 0.5038,
"step": 4310
},
{
"epoch": 0.93,
"grad_norm": 0.13191020488739014,
"learning_rate": 1.3278623412910308e-07,
"loss": 0.5257,
"step": 4311
},
{
"epoch": 0.93,
"grad_norm": 0.1355755478143692,
"learning_rate": 1.3198865581258046e-07,
"loss": 0.5244,
"step": 4312
},
{
"epoch": 0.93,
"grad_norm": 0.1625167280435562,
"learning_rate": 1.311934479980681e-07,
"loss": 0.4965,
"step": 4313
},
{
"epoch": 0.93,
"grad_norm": 0.18114399909973145,
"learning_rate": 1.3040061107279679e-07,
"loss": 0.5235,
"step": 4314
},
{
"epoch": 0.93,
"grad_norm": 0.15504209697246552,
"learning_rate": 1.2961014542284266e-07,
"loss": 0.5038,
"step": 4315
},
{
"epoch": 0.93,
"grad_norm": 0.20267391204833984,
"learning_rate": 1.2882205143312676e-07,
"loss": 0.4623,
"step": 4316
},
{
"epoch": 0.93,
"grad_norm": 0.1550229787826538,
"learning_rate": 1.280363294874154e-07,
"loss": 0.4784,
"step": 4317
},
{
"epoch": 0.93,
"grad_norm": 0.1660616248846054,
"learning_rate": 1.272529799683192e-07,
"loss": 0.4987,
"step": 4318
},
{
"epoch": 0.93,
"grad_norm": 0.15414029359817505,
"learning_rate": 1.264720032572947e-07,
"loss": 0.56,
"step": 4319
},
{
"epoch": 0.93,
"grad_norm": 0.18424440920352936,
"learning_rate": 1.2569339973464155e-07,
"loss": 0.4993,
"step": 4320
},
{
"epoch": 0.93,
"grad_norm": 0.1249246671795845,
"learning_rate": 1.249171697795054e-07,
"loss": 0.486,
"step": 4321
},
{
"epoch": 0.93,
"grad_norm": 0.15937843918800354,
"learning_rate": 1.2414331376987555e-07,
"loss": 0.5439,
"step": 4322
},
{
"epoch": 0.93,
"grad_norm": 0.16112691164016724,
"learning_rate": 1.233718320825833e-07,
"loss": 0.4971,
"step": 4323
},
{
"epoch": 0.93,
"grad_norm": 0.13961079716682434,
"learning_rate": 1.2260272509330707e-07,
"loss": 0.5513,
"step": 4324
},
{
"epoch": 0.93,
"grad_norm": 0.1391015648841858,
"learning_rate": 1.218359931765667e-07,
"loss": 0.5472,
"step": 4325
},
{
"epoch": 0.93,
"grad_norm": 0.1630607694387436,
"learning_rate": 1.2107163670572574e-07,
"loss": 0.5002,
"step": 4326
},
{
"epoch": 0.93,
"grad_norm": 0.16287516057491302,
"learning_rate": 1.2030965605299204e-07,
"loss": 0.4701,
"step": 4327
},
{
"epoch": 0.93,
"grad_norm": 0.12734615802764893,
"learning_rate": 1.195500515894149e-07,
"loss": 0.5591,
"step": 4328
},
{
"epoch": 0.93,
"grad_norm": 0.16435910761356354,
"learning_rate": 1.1879282368488787e-07,
"loss": 0.5503,
"step": 4329
},
{
"epoch": 0.93,
"grad_norm": 0.16866935789585114,
"learning_rate": 1.1803797270814765e-07,
"loss": 0.518,
"step": 4330
},
{
"epoch": 0.93,
"grad_norm": 0.17033065855503082,
"learning_rate": 1.1728549902677133e-07,
"loss": 0.4658,
"step": 4331
},
{
"epoch": 0.93,
"grad_norm": 0.18168850243091583,
"learning_rate": 1.165354030071808e-07,
"loss": 0.5154,
"step": 4332
},
{
"epoch": 0.93,
"grad_norm": 0.15495000779628754,
"learning_rate": 1.1578768501463722e-07,
"loss": 0.5399,
"step": 4333
},
{
"epoch": 0.93,
"grad_norm": 0.14426656067371368,
"learning_rate": 1.1504234541324765e-07,
"loss": 0.4739,
"step": 4334
},
{
"epoch": 0.93,
"grad_norm": 0.1701272577047348,
"learning_rate": 1.1429938456595735e-07,
"loss": 0.5633,
"step": 4335
},
{
"epoch": 0.93,
"grad_norm": 0.16904759407043457,
"learning_rate": 1.1355880283455523e-07,
"loss": 0.528,
"step": 4336
},
{
"epoch": 0.93,
"grad_norm": 0.1610129028558731,
"learning_rate": 1.1282060057967226e-07,
"loss": 0.5077,
"step": 4337
},
{
"epoch": 0.93,
"grad_norm": 0.1449388712644577,
"learning_rate": 1.1208477816077756e-07,
"loss": 0.5261,
"step": 4338
},
{
"epoch": 0.93,
"grad_norm": 0.17686261236667633,
"learning_rate": 1.1135133593618508e-07,
"loss": 0.5136,
"step": 4339
},
{
"epoch": 0.93,
"grad_norm": 0.13631290197372437,
"learning_rate": 1.1062027426304744e-07,
"loss": 0.5105,
"step": 4340
},
{
"epoch": 0.94,
"grad_norm": 0.15161027014255524,
"learning_rate": 1.0989159349735879e-07,
"loss": 0.5221,
"step": 4341
},
{
"epoch": 0.94,
"grad_norm": 0.15384641289710999,
"learning_rate": 1.091652939939547e-07,
"loss": 0.5192,
"step": 4342
},
{
"epoch": 0.94,
"grad_norm": 0.166702538728714,
"learning_rate": 1.084413761065084e-07,
"loss": 0.5481,
"step": 4343
},
{
"epoch": 0.94,
"grad_norm": 0.15912270545959473,
"learning_rate": 1.0771984018753733e-07,
"loss": 0.6039,
"step": 4344
},
{
"epoch": 0.94,
"grad_norm": 0.15669448673725128,
"learning_rate": 1.0700068658839491e-07,
"loss": 0.5047,
"step": 4345
},
{
"epoch": 0.94,
"grad_norm": 0.16294890642166138,
"learning_rate": 1.0628391565927765e-07,
"loss": 0.5736,
"step": 4346
},
{
"epoch": 0.94,
"grad_norm": 0.18943636119365692,
"learning_rate": 1.0556952774922136e-07,
"loss": 0.5612,
"step": 4347
},
{
"epoch": 0.94,
"grad_norm": 0.1898173987865448,
"learning_rate": 1.0485752320609944e-07,
"loss": 0.5456,
"step": 4348
},
{
"epoch": 0.94,
"grad_norm": 0.13543102145195007,
"learning_rate": 1.0414790237662676e-07,
"loss": 0.5888,
"step": 4349
},
{
"epoch": 0.94,
"grad_norm": 0.1901504397392273,
"learning_rate": 1.0344066560635635e-07,
"loss": 0.5364,
"step": 4350
},
{
"epoch": 0.94,
"grad_norm": 0.16581448912620544,
"learning_rate": 1.0273581323968052e-07,
"loss": 0.4779,
"step": 4351
},
{
"epoch": 0.94,
"grad_norm": 0.16107046604156494,
"learning_rate": 1.0203334561983025e-07,
"loss": 0.5074,
"step": 4352
},
{
"epoch": 0.94,
"grad_norm": 0.15327927470207214,
"learning_rate": 1.0133326308887692e-07,
"loss": 0.5471,
"step": 4353
},
{
"epoch": 0.94,
"grad_norm": 0.1985284835100174,
"learning_rate": 1.0063556598772839e-07,
"loss": 0.5462,
"step": 4354
},
{
"epoch": 0.94,
"grad_norm": 0.13533158600330353,
"learning_rate": 9.994025465613122e-08,
"loss": 0.5763,
"step": 4355
},
{
"epoch": 0.94,
"grad_norm": 0.19730281829833984,
"learning_rate": 9.924732943267068e-08,
"loss": 0.535,
"step": 4356
},
{
"epoch": 0.94,
"grad_norm": 0.18454429507255554,
"learning_rate": 9.855679065477131e-08,
"loss": 0.5222,
"step": 4357
},
{
"epoch": 0.94,
"grad_norm": 0.15890662372112274,
"learning_rate": 9.7868638658693e-08,
"loss": 0.4811,
"step": 4358
},
{
"epoch": 0.94,
"grad_norm": 0.181091770529747,
"learning_rate": 9.71828737795355e-08,
"loss": 0.5643,
"step": 4359
},
{
"epoch": 0.94,
"grad_norm": 0.13532613217830658,
"learning_rate": 9.6499496351235e-08,
"loss": 0.5115,
"step": 4360
},
{
"epoch": 0.94,
"grad_norm": 0.15786287188529968,
"learning_rate": 9.581850670656644e-08,
"loss": 0.5078,
"step": 4361
},
{
"epoch": 0.94,
"grad_norm": 0.1745007038116455,
"learning_rate": 9.513990517713955e-08,
"loss": 0.5805,
"step": 4362
},
{
"epoch": 0.94,
"grad_norm": 0.15297739207744598,
"learning_rate": 9.446369209340334e-08,
"loss": 0.4882,
"step": 4363
},
{
"epoch": 0.94,
"grad_norm": 0.1355600208044052,
"learning_rate": 9.378986778464327e-08,
"loss": 0.4854,
"step": 4364
},
{
"epoch": 0.94,
"grad_norm": 0.1561882495880127,
"learning_rate": 9.311843257898134e-08,
"loss": 0.491,
"step": 4365
},
{
"epoch": 0.94,
"grad_norm": 0.17752040922641754,
"learning_rate": 9.244938680337656e-08,
"loss": 0.5178,
"step": 4366
},
{
"epoch": 0.94,
"grad_norm": 0.12778738141059875,
"learning_rate": 9.178273078362332e-08,
"loss": 0.5,
"step": 4367
},
{
"epoch": 0.94,
"grad_norm": 0.1494607776403427,
"learning_rate": 9.111846484435361e-08,
"loss": 0.5469,
"step": 4368
},
{
"epoch": 0.94,
"grad_norm": 0.1332845240831375,
"learning_rate": 9.045658930903477e-08,
"loss": 0.5386,
"step": 4369
},
{
"epoch": 0.94,
"grad_norm": 0.18359340727329254,
"learning_rate": 8.979710449997014e-08,
"loss": 0.5668,
"step": 4370
},
{
"epoch": 0.94,
"grad_norm": 0.16064810752868652,
"learning_rate": 8.914001073829892e-08,
"loss": 0.5341,
"step": 4371
},
{
"epoch": 0.94,
"grad_norm": 0.14224553108215332,
"learning_rate": 8.848530834399683e-08,
"loss": 0.5512,
"step": 4372
},
{
"epoch": 0.94,
"grad_norm": 0.14381971955299377,
"learning_rate": 8.783299763587439e-08,
"loss": 0.5154,
"step": 4373
},
{
"epoch": 0.94,
"grad_norm": 0.1366354078054428,
"learning_rate": 8.718307893157696e-08,
"loss": 0.5354,
"step": 4374
},
{
"epoch": 0.94,
"grad_norm": 0.21582616865634918,
"learning_rate": 8.653555254758583e-08,
"loss": 0.5755,
"step": 4375
},
{
"epoch": 0.94,
"grad_norm": 0.18118129670619965,
"learning_rate": 8.589041879921711e-08,
"loss": 0.5604,
"step": 4376
},
{
"epoch": 0.94,
"grad_norm": 0.18753331899642944,
"learning_rate": 8.524767800062228e-08,
"loss": 0.5141,
"step": 4377
},
{
"epoch": 0.94,
"grad_norm": 0.15496698021888733,
"learning_rate": 8.460733046478653e-08,
"loss": 0.5408,
"step": 4378
},
{
"epoch": 0.94,
"grad_norm": 0.19295796751976013,
"learning_rate": 8.396937650353042e-08,
"loss": 0.5633,
"step": 4379
},
{
"epoch": 0.94,
"grad_norm": 0.16296663880348206,
"learning_rate": 8.333381642750882e-08,
"loss": 0.4816,
"step": 4380
},
{
"epoch": 0.94,
"grad_norm": 0.18352928757667542,
"learning_rate": 8.270065054621135e-08,
"loss": 0.521,
"step": 4381
},
{
"epoch": 0.94,
"grad_norm": 0.1636262685060501,
"learning_rate": 8.206987916796027e-08,
"loss": 0.492,
"step": 4382
},
{
"epoch": 0.94,
"grad_norm": 0.1417970508337021,
"learning_rate": 8.144150259991323e-08,
"loss": 0.4883,
"step": 4383
},
{
"epoch": 0.94,
"grad_norm": 0.15278513729572296,
"learning_rate": 8.081552114806101e-08,
"loss": 0.5371,
"step": 4384
},
{
"epoch": 0.94,
"grad_norm": 0.1443348526954651,
"learning_rate": 8.019193511722922e-08,
"loss": 0.4936,
"step": 4385
},
{
"epoch": 0.94,
"grad_norm": 0.17426589131355286,
"learning_rate": 7.957074481107551e-08,
"loss": 0.5743,
"step": 4386
},
{
"epoch": 0.95,
"grad_norm": 0.1521102637052536,
"learning_rate": 7.895195053209126e-08,
"loss": 0.5066,
"step": 4387
},
{
"epoch": 0.95,
"grad_norm": 0.1313631236553192,
"learning_rate": 7.833555258160208e-08,
"loss": 0.4878,
"step": 4388
},
{
"epoch": 0.95,
"grad_norm": 0.1430417150259018,
"learning_rate": 7.77215512597651e-08,
"loss": 0.5264,
"step": 4389
},
{
"epoch": 0.95,
"grad_norm": 0.1771220713853836,
"learning_rate": 7.710994686557172e-08,
"loss": 0.5333,
"step": 4390
},
{
"epoch": 0.95,
"grad_norm": 0.13800616562366486,
"learning_rate": 7.650073969684646e-08,
"loss": 0.5203,
"step": 4391
},
{
"epoch": 0.95,
"grad_norm": 0.1415596306324005,
"learning_rate": 7.589393005024482e-08,
"loss": 0.5199,
"step": 4392
},
{
"epoch": 0.95,
"grad_norm": 0.1424768567085266,
"learning_rate": 7.528951822125596e-08,
"loss": 0.4892,
"step": 4393
},
{
"epoch": 0.95,
"grad_norm": 0.15463979542255402,
"learning_rate": 7.468750450420114e-08,
"loss": 0.4966,
"step": 4394
},
{
"epoch": 0.95,
"grad_norm": 0.12930360436439514,
"learning_rate": 7.40878891922342e-08,
"loss": 0.525,
"step": 4395
},
{
"epoch": 0.95,
"grad_norm": 0.12379728257656097,
"learning_rate": 7.349067257733989e-08,
"loss": 0.532,
"step": 4396
},
{
"epoch": 0.95,
"grad_norm": 0.15126173198223114,
"learning_rate": 7.289585495033668e-08,
"loss": 0.5074,
"step": 4397
},
{
"epoch": 0.95,
"grad_norm": 0.1340315043926239,
"learning_rate": 7.230343660087402e-08,
"loss": 0.5003,
"step": 4398
},
{
"epoch": 0.95,
"grad_norm": 0.14905254542827606,
"learning_rate": 7.171341781743224e-08,
"loss": 0.5331,
"step": 4399
},
{
"epoch": 0.95,
"grad_norm": 0.13680437207221985,
"learning_rate": 7.11257988873243e-08,
"loss": 0.5186,
"step": 4400
},
{
"epoch": 0.95,
"grad_norm": 0.1248023733496666,
"learning_rate": 7.054058009669407e-08,
"loss": 0.4576,
"step": 4401
},
{
"epoch": 0.95,
"grad_norm": 0.14953729510307312,
"learning_rate": 6.995776173051583e-08,
"loss": 0.4709,
"step": 4402
},
{
"epoch": 0.95,
"grad_norm": 0.139199897646904,
"learning_rate": 6.937734407259756e-08,
"loss": 0.5412,
"step": 4403
},
{
"epoch": 0.95,
"grad_norm": 0.1763693392276764,
"learning_rate": 6.879932740557538e-08,
"loss": 0.5147,
"step": 4404
},
{
"epoch": 0.95,
"grad_norm": 0.17772704362869263,
"learning_rate": 6.822371201091749e-08,
"loss": 0.5658,
"step": 4405
},
{
"epoch": 0.95,
"grad_norm": 0.16532278060913086,
"learning_rate": 6.7650498168923e-08,
"loss": 0.4743,
"step": 4406
},
{
"epoch": 0.95,
"grad_norm": 0.17449362576007843,
"learning_rate": 6.707968615872085e-08,
"loss": 0.5396,
"step": 4407
},
{
"epoch": 0.95,
"grad_norm": 0.18282443284988403,
"learning_rate": 6.651127625827037e-08,
"loss": 0.5423,
"step": 4408
},
{
"epoch": 0.95,
"grad_norm": 0.13201217353343964,
"learning_rate": 6.594526874436236e-08,
"loss": 0.5364,
"step": 4409
},
{
"epoch": 0.95,
"grad_norm": 0.1461392194032669,
"learning_rate": 6.538166389261635e-08,
"loss": 0.5235,
"step": 4410
},
{
"epoch": 0.95,
"grad_norm": 0.15727302432060242,
"learning_rate": 6.482046197748282e-08,
"loss": 0.4949,
"step": 4411
},
{
"epoch": 0.95,
"grad_norm": 0.1405402272939682,
"learning_rate": 6.426166327224148e-08,
"loss": 0.5097,
"step": 4412
},
{
"epoch": 0.95,
"grad_norm": 0.14864054322242737,
"learning_rate": 6.3705268049003e-08,
"loss": 0.5318,
"step": 4413
},
{
"epoch": 0.95,
"grad_norm": 0.13717585802078247,
"learning_rate": 6.315127657870513e-08,
"loss": 0.4753,
"step": 4414
},
{
"epoch": 0.95,
"grad_norm": 0.15826748311519623,
"learning_rate": 6.259968913111869e-08,
"loss": 0.522,
"step": 4415
},
{
"epoch": 0.95,
"grad_norm": 0.1390410214662552,
"learning_rate": 6.205050597483997e-08,
"loss": 0.5485,
"step": 4416
},
{
"epoch": 0.95,
"grad_norm": 0.13676656782627106,
"learning_rate": 6.150372737729781e-08,
"loss": 0.5234,
"step": 4417
},
{
"epoch": 0.95,
"grad_norm": 0.20203281939029694,
"learning_rate": 6.095935360474814e-08,
"loss": 0.5139,
"step": 4418
},
{
"epoch": 0.95,
"grad_norm": 0.16294489800930023,
"learning_rate": 6.041738492227666e-08,
"loss": 0.5323,
"step": 4419
},
{
"epoch": 0.95,
"grad_norm": 0.19889448583126068,
"learning_rate": 5.98778215937973e-08,
"loss": 0.5655,
"step": 4420
},
{
"epoch": 0.95,
"grad_norm": 0.14263413846492767,
"learning_rate": 5.9340663882053727e-08,
"loss": 0.5585,
"step": 4421
},
{
"epoch": 0.95,
"grad_norm": 0.14396370947360992,
"learning_rate": 5.880591204861674e-08,
"loss": 0.5063,
"step": 4422
},
{
"epoch": 0.95,
"grad_norm": 0.1644524484872818,
"learning_rate": 5.827356635388692e-08,
"loss": 0.5243,
"step": 4423
},
{
"epoch": 0.95,
"grad_norm": 0.16655051708221436,
"learning_rate": 5.7743627057092463e-08,
"loss": 0.5033,
"step": 4424
},
{
"epoch": 0.95,
"grad_norm": 0.18211567401885986,
"learning_rate": 5.721609441629028e-08,
"loss": 0.5083,
"step": 4425
},
{
"epoch": 0.95,
"grad_norm": 0.14441342651844025,
"learning_rate": 5.669096868836377e-08,
"loss": 0.4764,
"step": 4426
},
{
"epoch": 0.95,
"grad_norm": 0.14789772033691406,
"learning_rate": 5.616825012902616e-08,
"loss": 0.4914,
"step": 4427
},
{
"epoch": 0.95,
"grad_norm": 0.1412544995546341,
"learning_rate": 5.564793899281884e-08,
"loss": 0.4267,
"step": 4428
},
{
"epoch": 0.95,
"grad_norm": 0.15629424154758453,
"learning_rate": 5.5130035533108587e-08,
"loss": 0.4807,
"step": 4429
},
{
"epoch": 0.95,
"grad_norm": 0.1984405219554901,
"learning_rate": 5.461454000209199e-08,
"loss": 0.5074,
"step": 4430
},
{
"epoch": 0.95,
"grad_norm": 0.15565001964569092,
"learning_rate": 5.410145265079103e-08,
"loss": 0.4852,
"step": 4431
},
{
"epoch": 0.95,
"grad_norm": 0.16649481654167175,
"learning_rate": 5.3590773729056965e-08,
"loss": 0.5007,
"step": 4432
},
{
"epoch": 0.95,
"grad_norm": 0.14113038778305054,
"learning_rate": 5.3082503485566425e-08,
"loss": 0.4873,
"step": 4433
},
{
"epoch": 0.96,
"grad_norm": 0.14539320766925812,
"learning_rate": 5.257664216782532e-08,
"loss": 0.4856,
"step": 4434
},
{
"epoch": 0.96,
"grad_norm": 0.1761976182460785,
"learning_rate": 5.2073190022164933e-08,
"loss": 0.4845,
"step": 4435
},
{
"epoch": 0.96,
"grad_norm": 0.1391577571630478,
"learning_rate": 5.157214729374305e-08,
"loss": 0.4913,
"step": 4436
},
{
"epoch": 0.96,
"grad_norm": 0.16997891664505005,
"learning_rate": 5.107351422654561e-08,
"loss": 0.5419,
"step": 4437
},
{
"epoch": 0.96,
"grad_norm": 0.15367954969406128,
"learning_rate": 5.057729106338505e-08,
"loss": 0.4658,
"step": 4438
},
{
"epoch": 0.96,
"grad_norm": 0.15846063196659088,
"learning_rate": 5.008347804589808e-08,
"loss": 0.5814,
"step": 4439
},
{
"epoch": 0.96,
"grad_norm": 0.18725064396858215,
"learning_rate": 4.959207541455013e-08,
"loss": 0.5488,
"step": 4440
},
{
"epoch": 0.96,
"grad_norm": 0.14484313130378723,
"learning_rate": 4.910308340863201e-08,
"loss": 0.4471,
"step": 4441
},
{
"epoch": 0.96,
"grad_norm": 0.1446012258529663,
"learning_rate": 4.8616502266261026e-08,
"loss": 0.5428,
"step": 4442
},
{
"epoch": 0.96,
"grad_norm": 0.17468306422233582,
"learning_rate": 4.813233222438041e-08,
"loss": 0.5287,
"step": 4443
},
{
"epoch": 0.96,
"grad_norm": 0.14374323189258575,
"learning_rate": 4.765057351875879e-08,
"loss": 0.5374,
"step": 4444
},
{
"epoch": 0.96,
"grad_norm": 0.14365346729755402,
"learning_rate": 4.7171226383990745e-08,
"loss": 0.5042,
"step": 4445
},
{
"epoch": 0.96,
"grad_norm": 0.16741974651813507,
"learning_rate": 4.6694291053496766e-08,
"loss": 0.5172,
"step": 4446
},
{
"epoch": 0.96,
"grad_norm": 0.15114641189575195,
"learning_rate": 4.621976775952386e-08,
"loss": 0.4949,
"step": 4447
},
{
"epoch": 0.96,
"grad_norm": 0.13638369739055634,
"learning_rate": 4.5747656733142184e-08,
"loss": 0.5654,
"step": 4448
},
{
"epoch": 0.96,
"grad_norm": 0.15733817219734192,
"learning_rate": 4.527795820424896e-08,
"loss": 0.5382,
"step": 4449
},
{
"epoch": 0.96,
"grad_norm": 0.18564561009407043,
"learning_rate": 4.481067240156678e-08,
"loss": 0.5269,
"step": 4450
},
{
"epoch": 0.96,
"grad_norm": 0.2260461002588272,
"learning_rate": 4.43457995526414e-08,
"loss": 0.546,
"step": 4451
},
{
"epoch": 0.96,
"grad_norm": 0.15831370651721954,
"learning_rate": 4.3883339883846186e-08,
"loss": 0.4982,
"step": 4452
},
{
"epoch": 0.96,
"grad_norm": 0.16516351699829102,
"learning_rate": 4.342329362037767e-08,
"loss": 0.5072,
"step": 4453
},
{
"epoch": 0.96,
"grad_norm": 0.16760680079460144,
"learning_rate": 4.296566098625776e-08,
"loss": 0.4515,
"step": 4454
},
{
"epoch": 0.96,
"grad_norm": 0.12296677380800247,
"learning_rate": 4.25104422043332e-08,
"loss": 0.4929,
"step": 4455
},
{
"epoch": 0.96,
"grad_norm": 0.16518919169902802,
"learning_rate": 4.2057637496273896e-08,
"loss": 0.524,
"step": 4456
},
{
"epoch": 0.96,
"grad_norm": 0.17474794387817383,
"learning_rate": 4.16072470825768e-08,
"loss": 0.5345,
"step": 4457
},
{
"epoch": 0.96,
"grad_norm": 0.13814187049865723,
"learning_rate": 4.115927118256036e-08,
"loss": 0.5051,
"step": 4458
},
{
"epoch": 0.96,
"grad_norm": 0.1405845582485199,
"learning_rate": 4.071371001436952e-08,
"loss": 0.4459,
"step": 4459
},
{
"epoch": 0.96,
"grad_norm": 0.13282142579555511,
"learning_rate": 4.02705637949724e-08,
"loss": 0.4892,
"step": 4460
},
{
"epoch": 0.96,
"grad_norm": 0.17903049290180206,
"learning_rate": 3.9829832740160834e-08,
"loss": 0.5045,
"step": 4461
},
{
"epoch": 0.96,
"grad_norm": 0.1339827924966812,
"learning_rate": 3.939151706455146e-08,
"loss": 0.5043,
"step": 4462
},
{
"epoch": 0.96,
"grad_norm": 0.1924246847629547,
"learning_rate": 3.895561698158357e-08,
"loss": 0.4559,
"step": 4463
},
{
"epoch": 0.96,
"grad_norm": 0.1578565388917923,
"learning_rate": 3.8522132703521784e-08,
"loss": 0.5406,
"step": 4464
},
{
"epoch": 0.96,
"grad_norm": 0.17296722531318665,
"learning_rate": 3.809106444145228e-08,
"loss": 0.5006,
"step": 4465
},
{
"epoch": 0.96,
"grad_norm": 0.15273821353912354,
"learning_rate": 3.7662412405286567e-08,
"loss": 0.5106,
"step": 4466
},
{
"epoch": 0.96,
"grad_norm": 0.13537849485874176,
"learning_rate": 3.723617680375935e-08,
"loss": 0.5051,
"step": 4467
},
{
"epoch": 0.96,
"grad_norm": 0.14120222628116608,
"learning_rate": 3.6812357844427385e-08,
"loss": 0.5358,
"step": 4468
},
{
"epoch": 0.96,
"grad_norm": 0.1762859970331192,
"learning_rate": 3.639095573367168e-08,
"loss": 0.5097,
"step": 4469
},
{
"epoch": 0.96,
"grad_norm": 0.13967743515968323,
"learning_rate": 3.597197067669533e-08,
"loss": 0.5434,
"step": 4470
},
{
"epoch": 0.96,
"grad_norm": 0.18413011729717255,
"learning_rate": 3.555540287752568e-08,
"loss": 0.5334,
"step": 4471
},
{
"epoch": 0.96,
"grad_norm": 0.2260027378797531,
"learning_rate": 3.514125253901324e-08,
"loss": 0.5245,
"step": 4472
},
{
"epoch": 0.96,
"grad_norm": 0.14992649853229523,
"learning_rate": 3.4729519862829466e-08,
"loss": 0.4908,
"step": 4473
},
{
"epoch": 0.96,
"grad_norm": 0.14780190587043762,
"learning_rate": 3.432020504947064e-08,
"loss": 0.5405,
"step": 4474
},
{
"epoch": 0.96,
"grad_norm": 0.1981695592403412,
"learning_rate": 3.3913308298253456e-08,
"loss": 0.5227,
"step": 4475
},
{
"epoch": 0.96,
"grad_norm": 0.23844297230243683,
"learning_rate": 3.350882980731884e-08,
"loss": 0.578,
"step": 4476
},
{
"epoch": 0.96,
"grad_norm": 0.17352676391601562,
"learning_rate": 3.310676977362925e-08,
"loss": 0.5591,
"step": 4477
},
{
"epoch": 0.96,
"grad_norm": 0.15774790942668915,
"learning_rate": 3.27071283929703e-08,
"loss": 0.5255,
"step": 4478
},
{
"epoch": 0.96,
"grad_norm": 0.1580473780632019,
"learning_rate": 3.230990585994964e-08,
"loss": 0.5217,
"step": 4479
},
{
"epoch": 0.97,
"grad_norm": 0.16399511694908142,
"learning_rate": 3.191510236799589e-08,
"loss": 0.4837,
"step": 4480
},
{
"epoch": 0.97,
"grad_norm": 0.14228610694408417,
"learning_rate": 3.152271810936081e-08,
"loss": 0.5381,
"step": 4481
},
{
"epoch": 0.97,
"grad_norm": 0.16083598136901855,
"learning_rate": 3.113275327511767e-08,
"loss": 0.4836,
"step": 4482
},
{
"epoch": 0.97,
"grad_norm": 0.11880119889974594,
"learning_rate": 3.074520805516235e-08,
"loss": 0.497,
"step": 4483
},
{
"epoch": 0.97,
"grad_norm": 0.1469106525182724,
"learning_rate": 3.0360082638211666e-08,
"loss": 0.5033,
"step": 4484
},
{
"epoch": 0.97,
"grad_norm": 0.1515989750623703,
"learning_rate": 2.997737721180338e-08,
"loss": 0.5238,
"step": 4485
},
{
"epoch": 0.97,
"grad_norm": 0.16306115686893463,
"learning_rate": 2.959709196229954e-08,
"loss": 0.5491,
"step": 4486
},
{
"epoch": 0.97,
"grad_norm": 0.14227931201457977,
"learning_rate": 2.921922707488034e-08,
"loss": 0.5258,
"step": 4487
},
{
"epoch": 0.97,
"grad_norm": 0.13914653658866882,
"learning_rate": 2.8843782733549706e-08,
"loss": 0.525,
"step": 4488
},
{
"epoch": 0.97,
"grad_norm": 0.16191960871219635,
"learning_rate": 2.847075912113195e-08,
"loss": 0.491,
"step": 4489
},
{
"epoch": 0.97,
"grad_norm": 0.2092602699995041,
"learning_rate": 2.8100156419272885e-08,
"loss": 0.5296,
"step": 4490
},
{
"epoch": 0.97,
"grad_norm": 0.1607397496700287,
"learning_rate": 2.7731974808439256e-08,
"loss": 0.5445,
"step": 4491
},
{
"epoch": 0.97,
"grad_norm": 0.15962082147598267,
"learning_rate": 2.7366214467919318e-08,
"loss": 0.5054,
"step": 4492
},
{
"epoch": 0.97,
"grad_norm": 0.1509018987417221,
"learning_rate": 2.7002875575820598e-08,
"loss": 0.5188,
"step": 4493
},
{
"epoch": 0.97,
"grad_norm": 0.1463000327348709,
"learning_rate": 2.664195830907379e-08,
"loss": 0.5455,
"step": 4494
},
{
"epoch": 0.97,
"grad_norm": 0.15445829927921295,
"learning_rate": 2.628346284342942e-08,
"loss": 0.4282,
"step": 4495
},
{
"epoch": 0.97,
"grad_norm": 0.1485372930765152,
"learning_rate": 2.5927389353457842e-08,
"loss": 0.5097,
"step": 4496
},
{
"epoch": 0.97,
"grad_norm": 0.1508139967918396,
"learning_rate": 2.5573738012550918e-08,
"loss": 0.5222,
"step": 4497
},
{
"epoch": 0.97,
"grad_norm": 0.1392061859369278,
"learning_rate": 2.5222508992922e-08,
"loss": 0.4993,
"step": 4498
},
{
"epoch": 0.97,
"grad_norm": 0.18485471606254578,
"learning_rate": 2.4873702465602612e-08,
"loss": 0.556,
"step": 4499
},
{
"epoch": 0.97,
"grad_norm": 0.14879798889160156,
"learning_rate": 2.4527318600446324e-08,
"loss": 0.53,
"step": 4500
},
{
"epoch": 0.97,
"grad_norm": 0.1910664439201355,
"learning_rate": 2.4183357566125998e-08,
"loss": 0.5285,
"step": 4501
},
{
"epoch": 0.97,
"grad_norm": 0.13035574555397034,
"learning_rate": 2.3841819530135424e-08,
"loss": 0.4912,
"step": 4502
},
{
"epoch": 0.97,
"grad_norm": 0.15490761399269104,
"learning_rate": 2.350270465878879e-08,
"loss": 0.4842,
"step": 4503
},
{
"epoch": 0.97,
"grad_norm": 0.16533678770065308,
"learning_rate": 2.3166013117218998e-08,
"loss": 0.5493,
"step": 4504
},
{
"epoch": 0.97,
"grad_norm": 0.15088367462158203,
"learning_rate": 2.2831745069379907e-08,
"loss": 0.5552,
"step": 4505
},
{
"epoch": 0.97,
"grad_norm": 0.17368783056735992,
"learning_rate": 2.249990067804464e-08,
"loss": 0.5297,
"step": 4506
},
{
"epoch": 0.97,
"grad_norm": 0.20666424930095673,
"learning_rate": 2.2170480104807268e-08,
"loss": 0.4992,
"step": 4507
},
{
"epoch": 0.97,
"grad_norm": 0.17731893062591553,
"learning_rate": 2.1843483510080032e-08,
"loss": 0.4926,
"step": 4508
},
{
"epoch": 0.97,
"grad_norm": 0.15629667043685913,
"learning_rate": 2.151891105309556e-08,
"loss": 0.4927,
"step": 4509
},
{
"epoch": 0.97,
"grad_norm": 0.13626927137374878,
"learning_rate": 2.119676289190631e-08,
"loss": 0.4622,
"step": 4510
},
{
"epoch": 0.97,
"grad_norm": 0.14321957528591156,
"learning_rate": 2.0877039183384018e-08,
"loss": 0.4869,
"step": 4511
},
{
"epoch": 0.97,
"grad_norm": 0.15675011277198792,
"learning_rate": 2.0559740083219147e-08,
"loss": 0.4736,
"step": 4512
},
{
"epoch": 0.97,
"grad_norm": 0.14412085711956024,
"learning_rate": 2.024486574592255e-08,
"loss": 0.457,
"step": 4513
},
{
"epoch": 0.97,
"grad_norm": 0.15253007411956787,
"learning_rate": 1.9932416324823235e-08,
"loss": 0.5338,
"step": 4514
},
{
"epoch": 0.97,
"grad_norm": 0.23936396837234497,
"learning_rate": 1.9622391972071164e-08,
"loss": 0.5333,
"step": 4515
},
{
"epoch": 0.97,
"grad_norm": 0.16216091811656952,
"learning_rate": 1.93147928386328e-08,
"loss": 0.445,
"step": 4516
},
{
"epoch": 0.97,
"grad_norm": 0.15972132980823517,
"learning_rate": 1.9009619074296102e-08,
"loss": 0.5372,
"step": 4517
},
{
"epoch": 0.97,
"grad_norm": 0.13417501747608185,
"learning_rate": 1.8706870827666646e-08,
"loss": 0.4829,
"step": 4518
},
{
"epoch": 0.97,
"grad_norm": 0.184243842959404,
"learning_rate": 1.840654824616872e-08,
"loss": 0.5087,
"step": 4519
},
{
"epoch": 0.97,
"grad_norm": 0.14154835045337677,
"learning_rate": 1.8108651476046457e-08,
"loss": 0.5314,
"step": 4520
},
{
"epoch": 0.97,
"grad_norm": 0.14898306131362915,
"learning_rate": 1.781318066236215e-08,
"loss": 0.536,
"step": 4521
},
{
"epoch": 0.97,
"grad_norm": 0.14588691294193268,
"learning_rate": 1.7520135948996263e-08,
"loss": 0.5138,
"step": 4522
},
{
"epoch": 0.97,
"grad_norm": 0.19737721979618073,
"learning_rate": 1.722951747864854e-08,
"loss": 0.5161,
"step": 4523
},
{
"epoch": 0.97,
"grad_norm": 0.16627101600170135,
"learning_rate": 1.6941325392837437e-08,
"loss": 0.573,
"step": 4524
},
{
"epoch": 0.97,
"grad_norm": 0.16760136187076569,
"learning_rate": 1.6655559831899038e-08,
"loss": 0.5447,
"step": 4525
},
{
"epoch": 0.97,
"grad_norm": 0.16920168697834015,
"learning_rate": 1.6372220934988693e-08,
"loss": 0.5242,
"step": 4526
},
{
"epoch": 0.98,
"grad_norm": 0.1605527251958847,
"learning_rate": 1.609130884007881e-08,
"loss": 0.5065,
"step": 4527
},
{
"epoch": 0.98,
"grad_norm": 0.16007407009601593,
"learning_rate": 1.5812823683962198e-08,
"loss": 0.4823,
"step": 4528
},
{
"epoch": 0.98,
"grad_norm": 0.15912877023220062,
"learning_rate": 1.5536765602248148e-08,
"loss": 0.4905,
"step": 4529
},
{
"epoch": 0.98,
"grad_norm": 0.1717677265405655,
"learning_rate": 1.5263134729363582e-08,
"loss": 0.5239,
"step": 4530
},
{
"epoch": 0.98,
"grad_norm": 0.16128626465797424,
"learning_rate": 1.49919311985558e-08,
"loss": 0.5036,
"step": 4531
},
{
"epoch": 0.98,
"grad_norm": 0.15915502607822418,
"learning_rate": 1.472315514188749e-08,
"loss": 0.4892,
"step": 4532
},
{
"epoch": 0.98,
"grad_norm": 0.18323585391044617,
"learning_rate": 1.4456806690241187e-08,
"loss": 0.487,
"step": 4533
},
{
"epoch": 0.98,
"grad_norm": 0.16480879485607147,
"learning_rate": 1.4192885973315918e-08,
"loss": 0.5064,
"step": 4534
},
{
"epoch": 0.98,
"grad_norm": 0.15716604888439178,
"learning_rate": 1.3931393119629987e-08,
"loss": 0.4886,
"step": 4535
},
{
"epoch": 0.98,
"grad_norm": 0.1497613936662674,
"learning_rate": 1.3672328256518208e-08,
"loss": 0.5372,
"step": 4536
},
{
"epoch": 0.98,
"grad_norm": 0.14100497961044312,
"learning_rate": 1.3415691510133555e-08,
"loss": 0.4763,
"step": 4537
},
{
"epoch": 0.98,
"grad_norm": 0.16514605283737183,
"learning_rate": 1.3161483005446618e-08,
"loss": 0.5506,
"step": 4538
},
{
"epoch": 0.98,
"grad_norm": 0.17945894598960876,
"learning_rate": 1.2909702866245045e-08,
"loss": 0.5278,
"step": 4539
},
{
"epoch": 0.98,
"grad_norm": 0.13528050482273102,
"learning_rate": 1.2660351215135203e-08,
"loss": 0.4926,
"step": 4540
},
{
"epoch": 0.98,
"grad_norm": 0.1794215887784958,
"learning_rate": 1.241342817353941e-08,
"loss": 0.4929,
"step": 4541
},
{
"epoch": 0.98,
"grad_norm": 0.14071007072925568,
"learning_rate": 1.2168933861698151e-08,
"loss": 0.502,
"step": 4542
},
{
"epoch": 0.98,
"grad_norm": 0.16157187521457672,
"learning_rate": 1.1926868398669522e-08,
"loss": 0.5321,
"step": 4543
},
{
"epoch": 0.98,
"grad_norm": 0.15778125822544098,
"learning_rate": 1.1687231902328122e-08,
"loss": 0.4767,
"step": 4544
},
{
"epoch": 0.98,
"grad_norm": 0.16504241526126862,
"learning_rate": 1.1450024489366163e-08,
"loss": 0.5074,
"step": 4545
},
{
"epoch": 0.98,
"grad_norm": 0.16821053624153137,
"learning_rate": 1.1215246275292913e-08,
"loss": 0.5654,
"step": 4546
},
{
"epoch": 0.98,
"grad_norm": 0.18481959402561188,
"learning_rate": 1.0982897374435252e-08,
"loss": 0.5467,
"step": 4547
},
{
"epoch": 0.98,
"grad_norm": 0.18275012075901031,
"learning_rate": 1.0752977899936013e-08,
"loss": 0.5656,
"step": 4548
},
{
"epoch": 0.98,
"grad_norm": 0.15175531804561615,
"learning_rate": 1.0525487963756186e-08,
"loss": 0.5455,
"step": 4549
},
{
"epoch": 0.98,
"grad_norm": 0.14397265017032623,
"learning_rate": 1.0300427676672164e-08,
"loss": 0.5424,
"step": 4550
},
{
"epoch": 0.98,
"grad_norm": 0.14812220633029938,
"learning_rate": 1.0077797148279056e-08,
"loss": 0.5515,
"step": 4551
},
{
"epoch": 0.98,
"grad_norm": 0.15202391147613525,
"learning_rate": 9.85759648698792e-09,
"loss": 0.5168,
"step": 4552
},
{
"epoch": 0.98,
"grad_norm": 0.221909299492836,
"learning_rate": 9.63982580002576e-09,
"loss": 0.5585,
"step": 4553
},
{
"epoch": 0.98,
"grad_norm": 0.1755637228488922,
"learning_rate": 9.42448519343775e-09,
"loss": 0.4478,
"step": 4554
},
{
"epoch": 0.98,
"grad_norm": 0.2023898959159851,
"learning_rate": 9.211574772085009e-09,
"loss": 0.5461,
"step": 4555
},
{
"epoch": 0.98,
"grad_norm": 0.26978346705436707,
"learning_rate": 9.001094639645158e-09,
"loss": 0.5124,
"step": 4556
},
{
"epoch": 0.98,
"grad_norm": 0.42439812421798706,
"learning_rate": 8.793044898612324e-09,
"loss": 0.4763,
"step": 4557
},
{
"epoch": 0.98,
"grad_norm": 0.1549844741821289,
"learning_rate": 8.587425650297688e-09,
"loss": 0.5193,
"step": 4558
},
{
"epoch": 0.98,
"grad_norm": 0.16406425833702087,
"learning_rate": 8.384236994828376e-09,
"loss": 0.5499,
"step": 4559
},
{
"epoch": 0.98,
"grad_norm": 0.15720784664154053,
"learning_rate": 8.183479031148022e-09,
"loss": 0.5331,
"step": 4560
},
{
"epoch": 0.98,
"grad_norm": 0.13800294697284698,
"learning_rate": 7.98515185701676e-09,
"loss": 0.493,
"step": 4561
},
{
"epoch": 0.98,
"grad_norm": 0.17194029688835144,
"learning_rate": 7.789255569011223e-09,
"loss": 0.5432,
"step": 4562
},
{
"epoch": 0.98,
"grad_norm": 0.13786643743515015,
"learning_rate": 7.595790262523995e-09,
"loss": 0.5465,
"step": 4563
},
{
"epoch": 0.98,
"grad_norm": 0.13633519411087036,
"learning_rate": 7.40475603176416e-09,
"loss": 0.4532,
"step": 4564
},
{
"epoch": 0.98,
"grad_norm": 0.1479789763689041,
"learning_rate": 7.216152969755641e-09,
"loss": 0.459,
"step": 4565
},
{
"epoch": 0.98,
"grad_norm": 0.15188181400299072,
"learning_rate": 7.029981168341082e-09,
"loss": 0.5184,
"step": 4566
},
{
"epoch": 0.98,
"grad_norm": 0.19617387652397156,
"learning_rate": 6.846240718176855e-09,
"loss": 0.4477,
"step": 4567
},
{
"epoch": 0.98,
"grad_norm": 0.1465783268213272,
"learning_rate": 6.664931708736943e-09,
"loss": 0.5164,
"step": 4568
},
{
"epoch": 0.98,
"grad_norm": 0.17284545302391052,
"learning_rate": 6.486054228309613e-09,
"loss": 0.5033,
"step": 4569
},
{
"epoch": 0.98,
"grad_norm": 0.1475616693496704,
"learning_rate": 6.309608364001296e-09,
"loss": 0.5787,
"step": 4570
},
{
"epoch": 0.98,
"grad_norm": 0.21322426199913025,
"learning_rate": 6.1355942017321534e-09,
"loss": 0.5463,
"step": 4571
},
{
"epoch": 0.98,
"grad_norm": 0.13938239216804504,
"learning_rate": 5.9640118262399575e-09,
"loss": 0.4885,
"step": 4572
},
{
"epoch": 0.99,
"grad_norm": 0.14598731696605682,
"learning_rate": 5.794861321077872e-09,
"loss": 0.5078,
"step": 4573
},
{
"epoch": 0.99,
"grad_norm": 0.13491961359977722,
"learning_rate": 5.628142768613343e-09,
"loss": 0.5065,
"step": 4574
},
{
"epoch": 0.99,
"grad_norm": 0.1279793381690979,
"learning_rate": 5.4638562500319844e-09,
"loss": 0.5256,
"step": 4575
},
{
"epoch": 0.99,
"grad_norm": 0.12754946947097778,
"learning_rate": 5.302001845333138e-09,
"loss": 0.5323,
"step": 4576
},
{
"epoch": 0.99,
"grad_norm": 0.1419224590063095,
"learning_rate": 5.1425796333332e-09,
"loss": 0.5048,
"step": 4577
},
{
"epoch": 0.99,
"grad_norm": 0.14779268205165863,
"learning_rate": 4.9855896916634065e-09,
"loss": 0.5018,
"step": 4578
},
{
"epoch": 0.99,
"grad_norm": 0.15771281719207764,
"learning_rate": 4.831032096770383e-09,
"loss": 0.54,
"step": 4579
},
{
"epoch": 0.99,
"grad_norm": 0.14732089638710022,
"learning_rate": 4.678906923916704e-09,
"loss": 0.5594,
"step": 4580
},
{
"epoch": 0.99,
"grad_norm": 0.13935159146785736,
"learning_rate": 4.529214247181446e-09,
"loss": 0.4833,
"step": 4581
},
{
"epoch": 0.99,
"grad_norm": 0.1624104082584381,
"learning_rate": 4.381954139457411e-09,
"loss": 0.5112,
"step": 4582
},
{
"epoch": 0.99,
"grad_norm": 0.14294303953647614,
"learning_rate": 4.237126672453351e-09,
"loss": 0.5499,
"step": 4583
},
{
"epoch": 0.99,
"grad_norm": 0.16533686220645905,
"learning_rate": 4.094731916693962e-09,
"loss": 0.5159,
"step": 4584
},
{
"epoch": 0.99,
"grad_norm": 0.14635160565376282,
"learning_rate": 3.9547699415198874e-09,
"loss": 0.5057,
"step": 4585
},
{
"epoch": 0.99,
"grad_norm": 0.149323508143425,
"learning_rate": 3.817240815084944e-09,
"loss": 0.4638,
"step": 4586
},
{
"epoch": 0.99,
"grad_norm": 0.19318562746047974,
"learning_rate": 3.68214460436056e-09,
"loss": 0.5073,
"step": 4587
},
{
"epoch": 0.99,
"grad_norm": 0.139207124710083,
"learning_rate": 3.5494813751324466e-09,
"loss": 0.5214,
"step": 4588
},
{
"epoch": 0.99,
"grad_norm": 0.16351597011089325,
"learning_rate": 3.4192511920011495e-09,
"loss": 0.548,
"step": 4589
},
{
"epoch": 0.99,
"grad_norm": 0.21700578927993774,
"learning_rate": 3.291454118383164e-09,
"loss": 0.5148,
"step": 4590
},
{
"epoch": 0.99,
"grad_norm": 0.1478326916694641,
"learning_rate": 3.1660902165098205e-09,
"loss": 0.5089,
"step": 4591
},
{
"epoch": 0.99,
"grad_norm": 0.13109348714351654,
"learning_rate": 3.043159547427843e-09,
"loss": 0.5693,
"step": 4592
},
{
"epoch": 0.99,
"grad_norm": 0.18486939370632172,
"learning_rate": 2.922662170998791e-09,
"loss": 0.5101,
"step": 4593
},
{
"epoch": 0.99,
"grad_norm": 0.23270238935947418,
"learning_rate": 2.804598145899062e-09,
"loss": 0.5173,
"step": 4594
},
{
"epoch": 0.99,
"grad_norm": 0.13426940143108368,
"learning_rate": 2.688967529621556e-09,
"loss": 0.5174,
"step": 4595
},
{
"epoch": 0.99,
"grad_norm": 0.1617153435945511,
"learning_rate": 2.575770378472342e-09,
"loss": 0.509,
"step": 4596
},
{
"epoch": 0.99,
"grad_norm": 0.17806501686573029,
"learning_rate": 2.4650067475734398e-09,
"loss": 0.5115,
"step": 4597
},
{
"epoch": 0.99,
"grad_norm": 0.16140803694725037,
"learning_rate": 2.3566766908622586e-09,
"loss": 0.5718,
"step": 4598
},
{
"epoch": 0.99,
"grad_norm": 0.1904280185699463,
"learning_rate": 2.25078026108938e-09,
"loss": 0.4756,
"step": 4599
},
{
"epoch": 0.99,
"grad_norm": 0.154715433716774,
"learning_rate": 2.1473175098229993e-09,
"loss": 0.5476,
"step": 4600
},
{
"epoch": 0.99,
"grad_norm": 0.13388168811798096,
"learning_rate": 2.046288487444481e-09,
"loss": 0.5101,
"step": 4601
},
{
"epoch": 0.99,
"grad_norm": 0.15827472507953644,
"learning_rate": 1.9476932431500286e-09,
"loss": 0.5506,
"step": 4602
},
{
"epoch": 0.99,
"grad_norm": 0.1361783891916275,
"learning_rate": 1.8515318249506809e-09,
"loss": 0.4876,
"step": 4603
},
{
"epoch": 0.99,
"grad_norm": 0.13444480299949646,
"learning_rate": 1.7578042796739803e-09,
"loss": 0.5557,
"step": 4604
},
{
"epoch": 0.99,
"grad_norm": 0.14045651257038116,
"learning_rate": 1.666510652960085e-09,
"loss": 0.533,
"step": 4605
},
{
"epoch": 0.99,
"grad_norm": 0.16582362353801727,
"learning_rate": 1.5776509892645453e-09,
"loss": 0.4909,
"step": 4606
},
{
"epoch": 0.99,
"grad_norm": 0.18087778985500336,
"learning_rate": 1.4912253318594138e-09,
"loss": 0.5907,
"step": 4607
},
{
"epoch": 0.99,
"grad_norm": 0.1612243801355362,
"learning_rate": 1.4072337228282496e-09,
"loss": 0.489,
"step": 4608
},
{
"epoch": 0.99,
"grad_norm": 0.1329774111509323,
"learning_rate": 1.3256762030727788e-09,
"loss": 0.4982,
"step": 4609
},
{
"epoch": 0.99,
"grad_norm": 0.15983089804649353,
"learning_rate": 1.2465528123073445e-09,
"loss": 0.554,
"step": 4610
},
{
"epoch": 0.99,
"grad_norm": 0.14717040956020355,
"learning_rate": 1.1698635890611264e-09,
"loss": 0.5216,
"step": 4611
},
{
"epoch": 0.99,
"grad_norm": 0.1585836112499237,
"learning_rate": 1.0956085706781416e-09,
"loss": 0.5291,
"step": 4612
},
{
"epoch": 0.99,
"grad_norm": 0.1522228866815567,
"learning_rate": 1.0237877933183538e-09,
"loss": 0.4878,
"step": 4613
},
{
"epoch": 0.99,
"grad_norm": 0.1510564535856247,
"learning_rate": 9.54401291953788e-10,
"loss": 0.454,
"step": 4614
},
{
"epoch": 0.99,
"grad_norm": 0.15678934752941132,
"learning_rate": 8.874491003735275e-10,
"loss": 0.4875,
"step": 4615
},
{
"epoch": 0.99,
"grad_norm": 0.14470191299915314,
"learning_rate": 8.229312511803811e-10,
"loss": 0.5576,
"step": 4616
},
{
"epoch": 0.99,
"grad_norm": 0.14706255495548248,
"learning_rate": 7.60847775790885e-10,
"loss": 0.5316,
"step": 4617
},
{
"epoch": 0.99,
"grad_norm": 0.18483339250087738,
"learning_rate": 7.011987044369673e-10,
"loss": 0.5227,
"step": 4618
},
{
"epoch": 0.99,
"grad_norm": 0.15726877748966217,
"learning_rate": 6.43984066165948e-10,
"loss": 0.4667,
"step": 4619
},
{
"epoch": 1.0,
"grad_norm": 0.17411313951015472,
"learning_rate": 5.892038888377638e-10,
"loss": 0.5399,
"step": 4620
},
{
"epoch": 1.0,
"grad_norm": 0.12193353474140167,
"learning_rate": 5.368581991282983e-10,
"loss": 0.5359,
"step": 4621
},
{
"epoch": 1.0,
"grad_norm": 0.1844024807214737,
"learning_rate": 4.869470225277174e-10,
"loss": 0.5551,
"step": 4622
},
{
"epoch": 1.0,
"grad_norm": 0.144753098487854,
"learning_rate": 4.3947038334046785e-10,
"loss": 0.5273,
"step": 4623
},
{
"epoch": 1.0,
"grad_norm": 0.18550604581832886,
"learning_rate": 3.9442830468472414e-10,
"loss": 0.5496,
"step": 4624
},
{
"epoch": 1.0,
"grad_norm": 0.1372082531452179,
"learning_rate": 3.5182080849516245e-10,
"loss": 0.4912,
"step": 4625
},
{
"epoch": 1.0,
"grad_norm": 0.16977030038833618,
"learning_rate": 3.1164791551907545e-10,
"loss": 0.4574,
"step": 4626
},
{
"epoch": 1.0,
"grad_norm": 0.15275530517101288,
"learning_rate": 2.739096453191481e-10,
"loss": 0.5382,
"step": 4627
},
{
"epoch": 1.0,
"grad_norm": 0.1399284303188324,
"learning_rate": 2.386060162717918e-10,
"loss": 0.5144,
"step": 4628
},
{
"epoch": 1.0,
"grad_norm": 0.18410004675388336,
"learning_rate": 2.05737045568255e-10,
"loss": 0.5252,
"step": 4629
},
{
"epoch": 1.0,
"grad_norm": 0.16072387993335724,
"learning_rate": 1.7530274921462308e-10,
"loss": 0.5003,
"step": 4630
},
{
"epoch": 1.0,
"grad_norm": 0.18865381181240082,
"learning_rate": 1.4730314203126318e-10,
"loss": 0.5048,
"step": 4631
},
{
"epoch": 1.0,
"grad_norm": 0.1395450234413147,
"learning_rate": 1.21738237651714e-10,
"loss": 0.4849,
"step": 4632
},
{
"epoch": 1.0,
"grad_norm": 0.13772456347942352,
"learning_rate": 9.860804852601658e-11,
"loss": 0.5158,
"step": 4633
},
{
"epoch": 1.0,
"grad_norm": 0.13184866309165955,
"learning_rate": 7.791258591682837e-11,
"loss": 0.526,
"step": 4634
},
{
"epoch": 1.0,
"grad_norm": 0.15045422315597534,
"learning_rate": 5.96518599021989e-11,
"loss": 0.5058,
"step": 4635
},
{
"epoch": 1.0,
"grad_norm": 0.15769906342029572,
"learning_rate": 4.382587937445948e-11,
"loss": 0.5534,
"step": 4636
},
{
"epoch": 1.0,
"grad_norm": 0.172768235206604,
"learning_rate": 3.0434652039668114e-11,
"loss": 0.5297,
"step": 4637
},
{
"epoch": 1.0,
"grad_norm": 0.13954681158065796,
"learning_rate": 1.947818441927485e-11,
"loss": 0.5442,
"step": 4638
},
{
"epoch": 1.0,
"grad_norm": 0.12631604075431824,
"learning_rate": 1.0956481847901323e-11,
"loss": 0.5348,
"step": 4639
},
{
"epoch": 1.0,
"grad_norm": 0.14625275135040283,
"learning_rate": 4.8695484761163145e-12,
"loss": 0.5268,
"step": 4640
},
{
"epoch": 1.0,
"grad_norm": 0.16151678562164307,
"learning_rate": 1.2173872671050746e-12,
"loss": 0.5801,
"step": 4641
},
{
"epoch": 1.0,
"grad_norm": 0.16961205005645752,
"learning_rate": 0.0,
"loss": 0.5504,
"step": 4642
},
{
"epoch": 1.0,
"step": 4642,
"total_flos": 1.0981284140833309e+19,
"train_loss": 0.5198679990626676,
"train_runtime": 67897.289,
"train_samples_per_second": 17.504,
"train_steps_per_second": 0.068
}
],
"logging_steps": 1.0,
"max_steps": 4642,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 8000,
"total_flos": 1.0981284140833309e+19,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}