14434 lines
370 KiB
JSON
14434 lines
370 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.8806026624471123,
|
|
"eval_steps": 200,
|
|
"global_step": 3200,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0005503766640294451,
|
|
"grad_norm": 2.2015435695648193,
|
|
"learning_rate": 1.4775011317868612e-06,
|
|
"loss": 0.7802,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0011007533280588903,
|
|
"grad_norm": 2.0623114109039307,
|
|
"learning_rate": 2.9550022635737224e-06,
|
|
"loss": 0.6659,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.0016511299920883354,
|
|
"grad_norm": 0.8444932699203491,
|
|
"learning_rate": 3.819285020442103e-06,
|
|
"loss": 0.6275,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.0022015066561177805,
|
|
"grad_norm": 0.7291238307952881,
|
|
"learning_rate": 4.432503395360583e-06,
|
|
"loss": 0.5955,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.0027518833201472257,
|
|
"grad_norm": 0.591098427772522,
|
|
"learning_rate": 4.90815251991065e-06,
|
|
"loss": 0.5906,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.003302259984176671,
|
|
"grad_norm": 0.6075527667999268,
|
|
"learning_rate": 5.2967861522289644e-06,
|
|
"loss": 0.5451,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.003852636648206116,
|
|
"grad_norm": 0.5598031878471375,
|
|
"learning_rate": 5.625371206454386e-06,
|
|
"loss": 0.5539,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.004403013312235561,
|
|
"grad_norm": 0.5352339148521423,
|
|
"learning_rate": 5.910004527147445e-06,
|
|
"loss": 0.5452,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.004953389976265006,
|
|
"grad_norm": 0.524741530418396,
|
|
"learning_rate": 6.161068909097345e-06,
|
|
"loss": 0.5536,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.005503766640294451,
|
|
"grad_norm": 0.4852159321308136,
|
|
"learning_rate": 6.38565365169751e-06,
|
|
"loss": 0.5439,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.0060541433043238965,
|
|
"grad_norm": 0.4764852225780487,
|
|
"learning_rate": 6.5888152636627215e-06,
|
|
"loss": 0.5468,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.006604519968353342,
|
|
"grad_norm": 0.463278591632843,
|
|
"learning_rate": 6.774287284015826e-06,
|
|
"loss": 0.541,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.007154896632382787,
|
|
"grad_norm": 0.4566305875778198,
|
|
"learning_rate": 6.944905003449378e-06,
|
|
"loss": 0.5258,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.007705273296412232,
|
|
"grad_norm": 0.4572094678878784,
|
|
"learning_rate": 7.102872338241248e-06,
|
|
"loss": 0.5385,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.008255649960441678,
|
|
"grad_norm": 0.4581094980239868,
|
|
"learning_rate": 7.2499364085658915e-06,
|
|
"loss": 0.5258,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.008806026624471122,
|
|
"grad_norm": 0.4602491557598114,
|
|
"learning_rate": 7.387505658934305e-06,
|
|
"loss": 0.5239,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.009356403288500568,
|
|
"grad_norm": 0.4633028507232666,
|
|
"learning_rate": 7.516732105870977e-06,
|
|
"loss": 0.5237,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.009906779952530012,
|
|
"grad_norm": 0.4267115592956543,
|
|
"learning_rate": 7.638570040884206e-06,
|
|
"loss": 0.5467,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.010457156616559458,
|
|
"grad_norm": 0.4587521255016327,
|
|
"learning_rate": 7.753818840648305e-06,
|
|
"loss": 0.5282,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.011007533280588903,
|
|
"grad_norm": 0.44529175758361816,
|
|
"learning_rate": 7.863154783484372e-06,
|
|
"loss": 0.536,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.011557909944618349,
|
|
"grad_norm": 0.436199814081192,
|
|
"learning_rate": 7.967155095109629e-06,
|
|
"loss": 0.5259,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.012108286608647793,
|
|
"grad_norm": 0.43157511949539185,
|
|
"learning_rate": 8.066316395449581e-06,
|
|
"loss": 0.5173,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.012658663272677239,
|
|
"grad_norm": 0.4393196105957031,
|
|
"learning_rate": 8.161069041569085e-06,
|
|
"loss": 0.5037,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.013209039936706683,
|
|
"grad_norm": 0.5085024237632751,
|
|
"learning_rate": 8.251788415802687e-06,
|
|
"loss": 0.5015,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.01375941660073613,
|
|
"grad_norm": 0.40056705474853516,
|
|
"learning_rate": 8.338803908034438e-06,
|
|
"loss": 0.5194,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.014309793264765574,
|
|
"grad_norm": 0.43617382645606995,
|
|
"learning_rate": 8.422406135236239e-06,
|
|
"loss": 0.5327,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.01486016992879502,
|
|
"grad_norm": 0.42528873682022095,
|
|
"learning_rate": 8.502852797752587e-06,
|
|
"loss": 0.5245,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.015410546592824464,
|
|
"grad_norm": 0.40340831875801086,
|
|
"learning_rate": 8.58037347002811e-06,
|
|
"loss": 0.5059,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.015960923256853908,
|
|
"grad_norm": 0.40859195590019226,
|
|
"learning_rate": 8.65517355028691e-06,
|
|
"loss": 0.5136,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.016511299920883356,
|
|
"grad_norm": 0.42262887954711914,
|
|
"learning_rate": 8.727437540352753e-06,
|
|
"loss": 0.5011,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.0170616765849128,
|
|
"grad_norm": 0.4255228638648987,
|
|
"learning_rate": 8.79733178747776e-06,
|
|
"loss": 0.5218,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.017612053248942244,
|
|
"grad_norm": 0.4350854754447937,
|
|
"learning_rate": 8.865006790721166e-06,
|
|
"loss": 0.5334,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.01816242991297169,
|
|
"grad_norm": 0.41395291686058044,
|
|
"learning_rate": 8.930599152317962e-06,
|
|
"loss": 0.5233,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.018712806577001136,
|
|
"grad_norm": 0.4127484858036041,
|
|
"learning_rate": 8.99423323765784e-06,
|
|
"loss": 0.5143,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.01926318324103058,
|
|
"grad_norm": 0.42464280128479004,
|
|
"learning_rate": 9.056022594578175e-06,
|
|
"loss": 0.5164,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.019813559905060025,
|
|
"grad_norm": 0.4011682868003845,
|
|
"learning_rate": 9.116071172671068e-06,
|
|
"loss": 0.5036,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.02036393656908947,
|
|
"grad_norm": 0.39912551641464233,
|
|
"learning_rate": 9.174474375494509e-06,
|
|
"loss": 0.5038,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.020914313233118917,
|
|
"grad_norm": 0.40526625514030457,
|
|
"learning_rate": 9.231319972435167e-06,
|
|
"loss": 0.518,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.02146468989714836,
|
|
"grad_norm": 0.4195484220981598,
|
|
"learning_rate": 9.28668889210462e-06,
|
|
"loss": 0.5065,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.022015066561177805,
|
|
"grad_norm": 0.4483351409435272,
|
|
"learning_rate": 9.340655915271231e-06,
|
|
"loss": 0.5128,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.02256544322520725,
|
|
"grad_norm": 0.42789894342422485,
|
|
"learning_rate": 9.393290282217048e-06,
|
|
"loss": 0.525,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.023115819889236697,
|
|
"grad_norm": 0.434644490480423,
|
|
"learning_rate": 9.444656226896488e-06,
|
|
"loss": 0.5248,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.02366619655326614,
|
|
"grad_norm": 0.4532856345176697,
|
|
"learning_rate": 9.494813448234365e-06,
|
|
"loss": 0.5226,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.024216573217295586,
|
|
"grad_norm": 0.4103749692440033,
|
|
"learning_rate": 9.543817527236444e-06,
|
|
"loss": 0.5034,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.02476694988132503,
|
|
"grad_norm": 0.4208613336086273,
|
|
"learning_rate": 9.591720297221133e-06,
|
|
"loss": 0.5214,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.025317326545354478,
|
|
"grad_norm": 0.4020327627658844,
|
|
"learning_rate": 9.638570173355947e-06,
|
|
"loss": 0.5047,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.025867703209383922,
|
|
"grad_norm": 0.4074559509754181,
|
|
"learning_rate": 9.684412446751251e-06,
|
|
"loss": 0.4999,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.026418079873413366,
|
|
"grad_norm": 0.43330731987953186,
|
|
"learning_rate": 9.729289547589548e-06,
|
|
"loss": 0.5089,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.02696845653744281,
|
|
"grad_norm": 0.42775431275367737,
|
|
"learning_rate": 9.773241281121913e-06,
|
|
"loss": 0.5169,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.02751883320147226,
|
|
"grad_norm": 0.421403706073761,
|
|
"learning_rate": 9.8163050398213e-06,
|
|
"loss": 0.5123,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.028069209865501703,
|
|
"grad_norm": 0.42337778210639954,
|
|
"learning_rate": 9.858515994526218e-06,
|
|
"loss": 0.5116,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.028619586529531147,
|
|
"grad_norm": 0.4156826138496399,
|
|
"learning_rate": 9.8999072670231e-06,
|
|
"loss": 0.5077,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.02916996319356059,
|
|
"grad_norm": 0.4544354975223541,
|
|
"learning_rate": 9.9405100861891e-06,
|
|
"loss": 0.5099,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.02972033985759004,
|
|
"grad_norm": 0.4015970528125763,
|
|
"learning_rate": 9.980353929539448e-06,
|
|
"loss": 0.5049,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.030270716521619483,
|
|
"grad_norm": 0.3907098174095154,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5202,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.030821093185648928,
|
|
"grad_norm": 0.4184499979019165,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5085,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.031371469849678375,
|
|
"grad_norm": 0.47195565700531006,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5161,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.031921846513707816,
|
|
"grad_norm": 0.43992695212364197,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4978,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.032472223177737264,
|
|
"grad_norm": 0.43099331855773926,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5035,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.03302259984176671,
|
|
"grad_norm": 0.44256317615509033,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4991,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.03357297650579615,
|
|
"grad_norm": 0.42082124948501587,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5028,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.0341233531698256,
|
|
"grad_norm": 0.38576358556747437,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5081,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.03467372983385505,
|
|
"grad_norm": 0.3880733251571655,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5001,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.03522410649788449,
|
|
"grad_norm": 0.41802075505256653,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5056,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.035774483161913936,
|
|
"grad_norm": 0.3949527144432068,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5155,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.03632485982594338,
|
|
"grad_norm": 0.4038969576358795,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5056,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.036875236489972825,
|
|
"grad_norm": 0.40195325016975403,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4968,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.03742561315400227,
|
|
"grad_norm": 0.3946043848991394,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4981,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.037975989818031713,
|
|
"grad_norm": 0.3914756774902344,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.03852636648206116,
|
|
"grad_norm": 0.4295148551464081,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5147,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.03907674314609061,
|
|
"grad_norm": 0.40092742443084717,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5196,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.03962711981012005,
|
|
"grad_norm": 0.41200628876686096,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5031,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.0401774964741495,
|
|
"grad_norm": 0.43834391236305237,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5047,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.04072787313817894,
|
|
"grad_norm": 0.3940436542034149,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4912,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.041278249802208386,
|
|
"grad_norm": 0.3873765170574188,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.482,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.041828626466237834,
|
|
"grad_norm": 0.4272858798503876,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4923,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.042379003130267275,
|
|
"grad_norm": 0.40542730689048767,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4892,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.04292937979429672,
|
|
"grad_norm": 0.38277357816696167,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.517,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.04347975645832617,
|
|
"grad_norm": 0.39421385526657104,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.503,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.04403013312235561,
|
|
"grad_norm": 0.3984109163284302,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5074,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.04458050978638506,
|
|
"grad_norm": 0.40513876080513,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5092,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.0451308864504145,
|
|
"grad_norm": 0.45850449800491333,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5086,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.04568126311444395,
|
|
"grad_norm": 0.4050631821155548,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5073,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.046231639778473395,
|
|
"grad_norm": 0.41050952672958374,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5007,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.046782016442502836,
|
|
"grad_norm": 0.39902788400650024,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4941,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.04733239310653228,
|
|
"grad_norm": 0.4421572983264923,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4988,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.04788276977056173,
|
|
"grad_norm": 0.4092646837234497,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5001,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.04843314643459117,
|
|
"grad_norm": 0.4195966124534607,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4964,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.04898352309862062,
|
|
"grad_norm": 0.3937481641769409,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4977,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.04953389976265006,
|
|
"grad_norm": 0.434950590133667,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5054,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.05008427642667951,
|
|
"grad_norm": 0.40112894773483276,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.494,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.050634653090708956,
|
|
"grad_norm": 0.42001938819885254,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4744,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.0511850297547384,
|
|
"grad_norm": 0.4066455364227295,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4838,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.051735406418767844,
|
|
"grad_norm": 0.3934157192707062,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5017,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.05228578308279729,
|
|
"grad_norm": 0.38877320289611816,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5018,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.05283615974682673,
|
|
"grad_norm": 0.39771756529808044,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.485,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.05338653641085618,
|
|
"grad_norm": 0.3938674330711365,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5034,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.05393691307488562,
|
|
"grad_norm": 0.40473559498786926,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5082,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.05448728973891507,
|
|
"grad_norm": 0.3977149426937103,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4997,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"grad_norm": 0.39340054988861084,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4859,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_merge_loss": 0.4414624571800232,
|
|
"eval_merge_runtime": 600.1539,
|
|
"eval_merge_samples_per_second": 56.239,
|
|
"eval_merge_steps_per_second": 2.344,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.5691156983375549,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.6767,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 71.839,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 2.997,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_sharegpt_gpt4.json_loss": 0.8223738670349121,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.6183,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.858,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.467,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_Table_GPT.json_loss": 0.09253557026386261,
|
|
"eval_Table_GPT.json_runtime": 24.9748,
|
|
"eval_Table_GPT.json_samples_per_second": 83.804,
|
|
"eval_Table_GPT.json_steps_per_second": 3.524,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_gpt_4o_200k.json_loss": 0.849287211894989,
|
|
"eval_gpt_4o_200k.json_runtime": 48.5339,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.415,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.398,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_multi_turn_datas.json_loss": 0.3907540738582611,
|
|
"eval_multi_turn_datas.json_runtime": 75.6133,
|
|
"eval_multi_turn_datas.json_samples_per_second": 52.927,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.209,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_table_python_code_datas.json_loss": 0.33119720220565796,
|
|
"eval_table_python_code_datas.json_runtime": 43.1313,
|
|
"eval_table_python_code_datas.json_samples_per_second": 50.056,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.087,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_tabular_llm_data.json_loss": 0.14601922035217285,
|
|
"eval_tabular_llm_data.json_runtime": 8.7785,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.023,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.253,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_python_code_critic_21k.json_loss": 0.625038743019104,
|
|
"eval_python_code_critic_21k.json_runtime": 3.237,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 184.43,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.723,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_all_merge_table_dataset.json_loss": 0.09772461652755737,
|
|
"eval_all_merge_table_dataset.json_runtime": 24.3077,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 29.291,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.234,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.6093290448188782,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.4589,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 67.809,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.834,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_ultrainteract_sft.json_loss": 0.4469935894012451,
|
|
"eval_ultrainteract_sft.json_runtime": 8.6702,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 167.931,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.036,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.11159003525972366,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.1306,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 260.355,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.315,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_sft_react_sql_datas.json_loss": 0.6847189664840698,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.8434,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 40.034,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.785,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_all_merge_code.json_loss": 0.32269543409347534,
|
|
"eval_all_merge_code.json_runtime": 0.3287,
|
|
"eval_all_merge_code.json_samples_per_second": 191.649,
|
|
"eval_all_merge_code.json_steps_per_second": 9.126,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_magpie_datas.json_loss": 0.4600640833377838,
|
|
"eval_magpie_datas.json_runtime": 2.2095,
|
|
"eval_magpie_datas.json_samples_per_second": 77.844,
|
|
"eval_magpie_datas.json_steps_per_second": 3.621,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_train_data_for_qwen.json_loss": 0.017207294702529907,
|
|
"eval_train_data_for_qwen.json_runtime": 0.2494,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 40.095,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.01,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_alpaca_cleaned.json_loss": 0.9374485015869141,
|
|
"eval_alpaca_cleaned.json_runtime": 0.1149,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 234.896,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.4,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_agent_instruct.json_loss": 0.23996739089488983,
|
|
"eval_agent_instruct.json_runtime": 0.5126,
|
|
"eval_agent_instruct.json_samples_per_second": 93.639,
|
|
"eval_agent_instruct.json_steps_per_second": 3.902,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_MathInstruct.json_loss": 0.2269323617219925,
|
|
"eval_MathInstruct.json_runtime": 0.3472,
|
|
"eval_MathInstruct.json_samples_per_second": 164.184,
|
|
"eval_MathInstruct.json_steps_per_second": 8.641,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.4513254165649414,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.3017,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 112.684,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.628,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.011208846233785152,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.1011,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 227.556,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 9.894,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.6813441514968872,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.0523,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 306.208,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.138,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_alpaca_gpt4_zh.json_loss": 1.0053786039352417,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.0504,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 218.451,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 19.859,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05503766640294452,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.5965134501457214,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.4841,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 41.316,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.066,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.05558804306697396,
|
|
"grad_norm": 0.38687607645988464,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.49,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.056138419731003406,
|
|
"grad_norm": 0.39803430438041687,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5047,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.05668879639503285,
|
|
"grad_norm": 0.41770851612091064,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4874,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.057239173059062294,
|
|
"grad_norm": 0.3909968435764313,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4992,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.05778954972309174,
|
|
"grad_norm": 0.3818782866001129,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5006,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.05833992638712118,
|
|
"grad_norm": 0.4179542362689972,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4945,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.05889030305115063,
|
|
"grad_norm": 0.3872973322868347,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4918,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.05944067971518008,
|
|
"grad_norm": 0.4249219298362732,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5039,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.05999105637920952,
|
|
"grad_norm": 0.43381986021995544,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4873,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.06054143304323897,
|
|
"grad_norm": 0.40741005539894104,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4771,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.061091809707268414,
|
|
"grad_norm": 0.37800464034080505,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5015,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.061642186371297855,
|
|
"grad_norm": 0.42365899682044983,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4906,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.0621925630353273,
|
|
"grad_norm": 0.39279666543006897,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.51,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.06274293969935675,
|
|
"grad_norm": 0.4037010073661804,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5162,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.0632933163633862,
|
|
"grad_norm": 0.37650179862976074,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4984,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.06384369302741563,
|
|
"grad_norm": 0.42879757285118103,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.492,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.06439406969144508,
|
|
"grad_norm": 0.42225000262260437,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5215,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.06494444635547453,
|
|
"grad_norm": 0.3948579430580139,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5045,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.06549482301950398,
|
|
"grad_norm": 0.40142592787742615,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5083,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.06604519968353342,
|
|
"grad_norm": 0.41938111186027527,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5094,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.06659557634756286,
|
|
"grad_norm": 0.4345923066139221,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5076,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.0671459530115923,
|
|
"grad_norm": 0.3985568881034851,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5007,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.06769632967562175,
|
|
"grad_norm": 0.37891215085983276,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.513,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.0682467063396512,
|
|
"grad_norm": 0.413566917181015,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.493,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.06879708300368065,
|
|
"grad_norm": 0.3980996608734131,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5161,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.0693474596677101,
|
|
"grad_norm": 0.4525178372859955,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5077,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.06989783633173953,
|
|
"grad_norm": 0.3720250427722931,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4809,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.07044821299576898,
|
|
"grad_norm": 0.37366852164268494,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4724,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.07099858965979843,
|
|
"grad_norm": 0.38189247250556946,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5062,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.07154896632382787,
|
|
"grad_norm": 0.39108410477638245,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4894,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.07209934298785732,
|
|
"grad_norm": 0.4071044921875,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4916,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.07264971965188675,
|
|
"grad_norm": 0.38570597767829895,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4925,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.0732000963159162,
|
|
"grad_norm": 0.409600168466568,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4987,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.07375047297994565,
|
|
"grad_norm": 0.3844049274921417,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5011,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.0743008496439751,
|
|
"grad_norm": 0.41260388493537903,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5014,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.07485122630800455,
|
|
"grad_norm": 0.402567982673645,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4926,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.07540160297203398,
|
|
"grad_norm": 0.4058002233505249,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4879,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.07595197963606343,
|
|
"grad_norm": 0.42676812410354614,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5073,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.07650235630009287,
|
|
"grad_norm": 0.3878956735134125,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4831,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.07705273296412232,
|
|
"grad_norm": 0.37560945749282837,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4705,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.07760310962815177,
|
|
"grad_norm": 0.4071865379810333,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.489,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.07815348629218122,
|
|
"grad_norm": 0.3832094073295593,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4843,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.07870386295621065,
|
|
"grad_norm": 0.3808830976486206,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5019,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.0792542396202401,
|
|
"grad_norm": 0.40182846784591675,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4921,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.07980461628426955,
|
|
"grad_norm": 0.4483119249343872,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5042,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.080354992948299,
|
|
"grad_norm": 0.3664950132369995,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4758,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.08090536961232844,
|
|
"grad_norm": 0.39573603868484497,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4945,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.08145574627635788,
|
|
"grad_norm": 0.44645532965660095,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4964,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.08200612294038732,
|
|
"grad_norm": 0.39092323184013367,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4947,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.08255649960441677,
|
|
"grad_norm": 0.41762229800224304,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4949,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.08310687626844622,
|
|
"grad_norm": 0.39803358912467957,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4822,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.08365725293247567,
|
|
"grad_norm": 0.39895498752593994,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4893,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.0842076295965051,
|
|
"grad_norm": 0.3883228600025177,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5062,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.08475800626053455,
|
|
"grad_norm": 0.4112294018268585,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4979,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.085308382924564,
|
|
"grad_norm": 0.3851683437824249,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4934,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.08585875958859344,
|
|
"grad_norm": 0.39728567004203796,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4746,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.08640913625262289,
|
|
"grad_norm": 0.3943733274936676,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4904,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.08695951291665234,
|
|
"grad_norm": 0.3954530656337738,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4796,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.08750988958068177,
|
|
"grad_norm": 0.41237205266952515,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4908,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.08806026624471122,
|
|
"grad_norm": 0.3923771381378174,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4988,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.08861064290874067,
|
|
"grad_norm": 0.38542094826698303,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5027,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.08916101957277012,
|
|
"grad_norm": 0.41598251461982727,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4976,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.08971139623679956,
|
|
"grad_norm": 0.40826794505119324,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4929,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.090261772900829,
|
|
"grad_norm": 0.39970022439956665,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4946,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.09081214956485845,
|
|
"grad_norm": 0.3739086985588074,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4678,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.0913625262288879,
|
|
"grad_norm": 0.3746420741081238,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4757,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.09191290289291734,
|
|
"grad_norm": 0.3976924419403076,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.487,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.09246327955694679,
|
|
"grad_norm": 0.398971289396286,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5077,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.09301365622097624,
|
|
"grad_norm": 0.3937431871891022,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4885,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.09356403288500567,
|
|
"grad_norm": 0.395084410905838,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4871,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.09411440954903512,
|
|
"grad_norm": 0.3677273690700531,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4813,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.09466478621306457,
|
|
"grad_norm": 0.39645129442214966,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4842,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.09521516287709401,
|
|
"grad_norm": 0.3642916679382324,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.504,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.09576553954112346,
|
|
"grad_norm": 0.40385907888412476,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4933,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.0963159162051529,
|
|
"grad_norm": 0.39063799381256104,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4856,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.09686629286918234,
|
|
"grad_norm": 0.38000059127807617,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5001,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.09741666953321179,
|
|
"grad_norm": 0.39380577206611633,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4961,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.09796704619724124,
|
|
"grad_norm": 0.39326363801956177,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.498,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.09851742286127069,
|
|
"grad_norm": 0.3775707185268402,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4792,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.09906779952530012,
|
|
"grad_norm": 0.3770863115787506,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4837,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.09961817618932957,
|
|
"grad_norm": 0.41484272480010986,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4739,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.10016855285335902,
|
|
"grad_norm": 0.39758750796318054,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4957,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.10071892951738846,
|
|
"grad_norm": 0.43485164642333984,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.492,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.10126930618141791,
|
|
"grad_norm": 0.40296798944473267,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4977,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.10181968284544736,
|
|
"grad_norm": 0.3818409740924835,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.481,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.1023700595094768,
|
|
"grad_norm": 0.3949006199836731,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5021,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.10292043617350624,
|
|
"grad_norm": 0.4327391982078552,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5036,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.10347081283753569,
|
|
"grad_norm": 0.4008086025714874,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4854,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.10402118950156514,
|
|
"grad_norm": 0.4146427810192108,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4933,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.10457156616559458,
|
|
"grad_norm": 0.4073733389377594,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4923,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.10512194282962402,
|
|
"grad_norm": 0.40570083260536194,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4806,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.10567231949365347,
|
|
"grad_norm": 0.39516401290893555,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5038,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.10622269615768291,
|
|
"grad_norm": 0.3886268138885498,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4737,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.10677307282171236,
|
|
"grad_norm": 0.3846561014652252,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4852,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.10732344948574181,
|
|
"grad_norm": 0.3952987492084503,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.496,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.10787382614977124,
|
|
"grad_norm": 0.3840448558330536,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4976,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.10842420281380069,
|
|
"grad_norm": 0.38074344396591187,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.508,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.10897457947783014,
|
|
"grad_norm": 0.4216584861278534,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4841,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.10952495614185959,
|
|
"grad_norm": 0.39932167530059814,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4783,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"grad_norm": 0.3687106966972351,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4747,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_merge_loss": 0.42975950241088867,
|
|
"eval_merge_runtime": 600.4283,
|
|
"eval_merge_samples_per_second": 56.213,
|
|
"eval_merge_steps_per_second": 2.343,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.558424174785614,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.4015,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 73.74,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 3.076,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_sharegpt_gpt4.json_loss": 0.8120941519737244,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.6378,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.822,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.465,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_Table_GPT.json_loss": 0.08201506733894348,
|
|
"eval_Table_GPT.json_runtime": 24.9859,
|
|
"eval_Table_GPT.json_samples_per_second": 83.767,
|
|
"eval_Table_GPT.json_steps_per_second": 3.522,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_gpt_4o_200k.json_loss": 0.8391836881637573,
|
|
"eval_gpt_4o_200k.json_runtime": 48.456,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.623,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.407,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_multi_turn_datas.json_loss": 0.37471804022789,
|
|
"eval_multi_turn_datas.json_runtime": 75.4526,
|
|
"eval_multi_turn_datas.json_samples_per_second": 53.04,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.213,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_table_python_code_datas.json_loss": 0.3102189898490906,
|
|
"eval_table_python_code_datas.json_runtime": 42.9961,
|
|
"eval_table_python_code_datas.json_samples_per_second": 50.214,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.093,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_tabular_llm_data.json_loss": 0.16565443575382233,
|
|
"eval_tabular_llm_data.json_runtime": 8.5095,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.909,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.293,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_python_code_critic_21k.json_loss": 0.6095640063285828,
|
|
"eval_python_code_critic_21k.json_runtime": 3.2106,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 185.945,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.787,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_all_merge_table_dataset.json_loss": 0.09411227703094482,
|
|
"eval_all_merge_table_dataset.json_runtime": 23.2459,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 30.629,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.291,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.6033111810684204,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.3176,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 68.105,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.847,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_ultrainteract_sft.json_loss": 0.4417967200279236,
|
|
"eval_ultrainteract_sft.json_runtime": 8.6225,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 168.86,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.075,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.10689640045166016,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.1258,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 270.238,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.896,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_sft_react_sql_datas.json_loss": 0.6745051145553589,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.8354,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 40.074,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.787,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_all_merge_code.json_loss": 0.3035649061203003,
|
|
"eval_all_merge_code.json_runtime": 0.3282,
|
|
"eval_all_merge_code.json_samples_per_second": 191.936,
|
|
"eval_all_merge_code.json_steps_per_second": 9.14,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_magpie_datas.json_loss": 0.4511661231517792,
|
|
"eval_magpie_datas.json_runtime": 2.2095,
|
|
"eval_magpie_datas.json_samples_per_second": 77.847,
|
|
"eval_magpie_datas.json_steps_per_second": 3.621,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_train_data_for_qwen.json_loss": 0.012529651634395123,
|
|
"eval_train_data_for_qwen.json_runtime": 0.2431,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 41.135,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.113,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_alpaca_cleaned.json_loss": 0.9377387166023254,
|
|
"eval_alpaca_cleaned.json_runtime": 0.115,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 234.777,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.391,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_agent_instruct.json_loss": 0.2363067865371704,
|
|
"eval_agent_instruct.json_runtime": 0.5121,
|
|
"eval_agent_instruct.json_samples_per_second": 93.728,
|
|
"eval_agent_instruct.json_steps_per_second": 3.905,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_MathInstruct.json_loss": 0.21367128193378448,
|
|
"eval_MathInstruct.json_runtime": 0.3647,
|
|
"eval_MathInstruct.json_samples_per_second": 156.291,
|
|
"eval_MathInstruct.json_steps_per_second": 8.226,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.4474259316921234,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.3019,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 112.631,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.625,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.011296543292701244,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.1,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 230.102,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 10.004,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.645748496055603,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.0515,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 310.905,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.432,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_alpaca_gpt4_zh.json_loss": 0.9886136651039124,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.0503,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 218.827,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 19.893,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11007533280588903,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.5969922542572021,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.4851,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 41.226,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.061,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.11062570946991848,
|
|
"grad_norm": 0.40487441420555115,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4811,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.11117608613394792,
|
|
"grad_norm": 0.39143064618110657,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4704,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.11172646279797736,
|
|
"grad_norm": 0.46816787123680115,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4941,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.11227683946200681,
|
|
"grad_norm": 0.37707188725471497,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4839,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.11282721612603626,
|
|
"grad_norm": 0.3780951201915741,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4889,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.1133775927900657,
|
|
"grad_norm": 0.36941519379615784,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4747,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.11392796945409514,
|
|
"grad_norm": 0.39626002311706543,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4872,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.11447834611812459,
|
|
"grad_norm": 0.38315075635910034,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.471,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.11502872278215404,
|
|
"grad_norm": 0.37200862169265747,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4891,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.11557909944618348,
|
|
"grad_norm": 0.39199399948120117,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4807,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.11612947611021293,
|
|
"grad_norm": 0.37726107239723206,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4834,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.11667985277424237,
|
|
"grad_norm": 0.38188016414642334,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4853,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.11723022943827181,
|
|
"grad_norm": 0.39772850275039673,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4895,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.11778060610230126,
|
|
"grad_norm": 0.3797503411769867,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4818,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.11833098276633071,
|
|
"grad_norm": 0.39962416887283325,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4802,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.11888135943036016,
|
|
"grad_norm": 0.37405237555503845,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4879,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.1194317360943896,
|
|
"grad_norm": 0.39297720789909363,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4853,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.11998211275841904,
|
|
"grad_norm": 0.3871022164821625,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4845,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.12053248942244849,
|
|
"grad_norm": 0.43845734000205994,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4865,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.12108286608647793,
|
|
"grad_norm": 0.3888757526874542,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4862,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.12163324275050738,
|
|
"grad_norm": 0.3801029920578003,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4751,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.12218361941453683,
|
|
"grad_norm": 0.3861992657184601,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5026,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.12273399607856626,
|
|
"grad_norm": 0.40307343006134033,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4901,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.12328437274259571,
|
|
"grad_norm": 0.36803606152534485,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4927,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.12383474940662516,
|
|
"grad_norm": 0.40266790986061096,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4663,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.1243851260706546,
|
|
"grad_norm": 0.3870522975921631,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.475,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.12493550273468405,
|
|
"grad_norm": 0.3978688716888428,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4979,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.1254858793987135,
|
|
"grad_norm": 0.3799881935119629,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4802,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.12603625606274294,
|
|
"grad_norm": 0.3795452415943146,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4878,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.1265866327267724,
|
|
"grad_norm": 0.3865358233451843,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4825,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.12713700939080183,
|
|
"grad_norm": 0.3646644353866577,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4725,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.12768738605483126,
|
|
"grad_norm": 0.3851023018360138,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4849,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.12823776271886073,
|
|
"grad_norm": 0.37587490677833557,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4729,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.12878813938289016,
|
|
"grad_norm": 0.3559257686138153,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4826,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.12933851604691962,
|
|
"grad_norm": 0.3967975974082947,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4917,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.12988889271094906,
|
|
"grad_norm": 0.4064919650554657,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.5018,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.1304392693749785,
|
|
"grad_norm": 0.3609434962272644,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4805,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.13098964603900795,
|
|
"grad_norm": 0.4229820668697357,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4756,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.13154002270303738,
|
|
"grad_norm": 0.3882080018520355,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4946,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.13209039936706685,
|
|
"grad_norm": 0.37811529636383057,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.495,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.13264077603109628,
|
|
"grad_norm": 0.4139231741428375,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4722,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.13319115269512571,
|
|
"grad_norm": 0.3836536705493927,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4795,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.13374152935915518,
|
|
"grad_norm": 0.39434006810188293,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4783,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.1342919060231846,
|
|
"grad_norm": 0.3847144544124603,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4751,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.13484228268721407,
|
|
"grad_norm": 0.4081107974052429,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4947,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.1353926593512435,
|
|
"grad_norm": 0.3780671954154968,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4932,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.13594303601527294,
|
|
"grad_norm": 0.39522022008895874,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4868,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.1364934126793024,
|
|
"grad_norm": 0.3978594243526459,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4895,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.13704378934333183,
|
|
"grad_norm": 0.40067028999328613,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4841,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.1375941660073613,
|
|
"grad_norm": 0.38525891304016113,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4769,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.13814454267139073,
|
|
"grad_norm": 0.3708615303039551,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4787,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.1386949193354202,
|
|
"grad_norm": 0.3583269417285919,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4905,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.13924529599944963,
|
|
"grad_norm": 0.4004143178462982,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4797,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.13979567266347906,
|
|
"grad_norm": 0.3877711594104767,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4968,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.14034604932750852,
|
|
"grad_norm": 0.394502729177475,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4743,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.14089642599153795,
|
|
"grad_norm": 0.3829086720943451,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4769,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.14144680265556742,
|
|
"grad_norm": 0.3849917948246002,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4763,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.14199717931959685,
|
|
"grad_norm": 0.40810078382492065,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4904,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.14254755598362628,
|
|
"grad_norm": 0.3982490599155426,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4762,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.14309793264765575,
|
|
"grad_norm": 0.36841145157814026,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4745,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.14364830931168518,
|
|
"grad_norm": 0.3805830180644989,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4811,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.14419868597571464,
|
|
"grad_norm": 0.40074169635772705,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4923,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.14474906263974407,
|
|
"grad_norm": 0.42140403389930725,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4972,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.1452994393037735,
|
|
"grad_norm": 0.38489535450935364,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4921,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.14584981596780297,
|
|
"grad_norm": 0.38449668884277344,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4883,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.1464001926318324,
|
|
"grad_norm": 0.38009950518608093,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4808,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.14695056929586187,
|
|
"grad_norm": 0.3916541337966919,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4708,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.1475009459598913,
|
|
"grad_norm": 0.39856135845184326,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4933,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.14805132262392073,
|
|
"grad_norm": 0.3804597556591034,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4772,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.1486016992879502,
|
|
"grad_norm": 0.39584964513778687,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4746,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.14915207595197963,
|
|
"grad_norm": 0.36922863125801086,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4911,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.1497024526160091,
|
|
"grad_norm": 0.38762298226356506,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4744,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.15025282928003852,
|
|
"grad_norm": 0.38803887367248535,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4776,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.15080320594406796,
|
|
"grad_norm": 0.39409226179122925,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4789,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.15135358260809742,
|
|
"grad_norm": 0.4141768217086792,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4752,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.15190395927212685,
|
|
"grad_norm": 0.3770216703414917,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4689,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.15245433593615632,
|
|
"grad_norm": 0.3929697573184967,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4861,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.15300471260018575,
|
|
"grad_norm": 0.3859105706214905,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4799,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.1535550892642152,
|
|
"grad_norm": 0.41044744849205017,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4911,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.15410546592824464,
|
|
"grad_norm": 0.36859771609306335,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4653,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.15465584259227408,
|
|
"grad_norm": 0.39258813858032227,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4769,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.15520621925630354,
|
|
"grad_norm": 0.38241100311279297,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4821,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.15575659592033297,
|
|
"grad_norm": 0.4107513427734375,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4746,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.15630697258436244,
|
|
"grad_norm": 0.3872488737106323,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4817,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.15685734924839187,
|
|
"grad_norm": 0.3712390065193176,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4944,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.1574077259124213,
|
|
"grad_norm": 0.413503497838974,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4774,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.15795810257645077,
|
|
"grad_norm": 0.35706543922424316,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4743,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.1585084792404802,
|
|
"grad_norm": 0.39815768599510193,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4846,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.15905885590450966,
|
|
"grad_norm": 0.38346678018569946,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4633,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.1596092325685391,
|
|
"grad_norm": 0.3905611038208008,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4776,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.16015960923256853,
|
|
"grad_norm": 0.3790382742881775,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4892,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.160709985896598,
|
|
"grad_norm": 0.37033775448799133,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4848,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.16126036256062742,
|
|
"grad_norm": 0.3686079680919647,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4514,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.16181073922465689,
|
|
"grad_norm": 0.3836509883403778,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4859,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.16236111588868632,
|
|
"grad_norm": 0.40387076139450073,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.485,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.16291149255271575,
|
|
"grad_norm": 0.3850373327732086,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4843,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.16346186921674521,
|
|
"grad_norm": 0.3814505934715271,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4749,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.16401224588077465,
|
|
"grad_norm": 0.35501739382743835,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4645,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.1645626225448041,
|
|
"grad_norm": 0.34997090697288513,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4687,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"grad_norm": 0.365212619304657,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4956,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_merge_loss": 0.4216049909591675,
|
|
"eval_merge_runtime": 600.0688,
|
|
"eval_merge_samples_per_second": 56.247,
|
|
"eval_merge_steps_per_second": 2.345,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.5500591397285461,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.5295,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 72.843,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 3.039,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_sharegpt_gpt4.json_loss": 0.8046284914016724,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.7366,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.639,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.458,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_Table_GPT.json_loss": 0.07981107383966446,
|
|
"eval_Table_GPT.json_runtime": 25.0085,
|
|
"eval_Table_GPT.json_samples_per_second": 83.691,
|
|
"eval_Table_GPT.json_steps_per_second": 3.519,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_gpt_4o_200k.json_loss": 0.8323716521263123,
|
|
"eval_gpt_4o_200k.json_runtime": 48.5988,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.242,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.391,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_multi_turn_datas.json_loss": 0.36492469906806946,
|
|
"eval_multi_turn_datas.json_runtime": 75.8696,
|
|
"eval_multi_turn_datas.json_samples_per_second": 52.748,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.201,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_table_python_code_datas.json_loss": 0.29984766244888306,
|
|
"eval_table_python_code_datas.json_runtime": 43.1945,
|
|
"eval_table_python_code_datas.json_samples_per_second": 49.983,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.084,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_tabular_llm_data.json_loss": 0.13250145316123962,
|
|
"eval_tabular_llm_data.json_runtime": 8.5476,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.78,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.287,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_python_code_critic_21k.json_loss": 0.599878191947937,
|
|
"eval_python_code_critic_21k.json_runtime": 3.2358,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 184.496,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.726,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_all_merge_table_dataset.json_loss": 0.08627181500196457,
|
|
"eval_all_merge_table_dataset.json_runtime": 23.3808,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 30.452,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.283,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.5982062220573425,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.4617,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 67.803,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.834,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_ultrainteract_sft.json_loss": 0.4367137849330902,
|
|
"eval_ultrainteract_sft.json_runtime": 8.672,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 167.896,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.034,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.1079096570611,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.1265,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 268.769,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.81,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_sft_react_sql_datas.json_loss": 0.6610473990440369,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.8536,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 39.982,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.783,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_all_merge_code.json_loss": 0.3021065890789032,
|
|
"eval_all_merge_code.json_runtime": 0.3377,
|
|
"eval_all_merge_code.json_samples_per_second": 186.571,
|
|
"eval_all_merge_code.json_steps_per_second": 8.884,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_magpie_datas.json_loss": 0.4455747604370117,
|
|
"eval_magpie_datas.json_runtime": 2.2122,
|
|
"eval_magpie_datas.json_samples_per_second": 77.751,
|
|
"eval_magpie_datas.json_steps_per_second": 3.616,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_train_data_for_qwen.json_loss": 0.009937227703630924,
|
|
"eval_train_data_for_qwen.json_runtime": 0.2454,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 40.745,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.075,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_alpaca_cleaned.json_loss": 0.9349167943000793,
|
|
"eval_alpaca_cleaned.json_runtime": 0.1148,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 235.251,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.426,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_agent_instruct.json_loss": 0.2310038059949875,
|
|
"eval_agent_instruct.json_runtime": 0.5119,
|
|
"eval_agent_instruct.json_samples_per_second": 93.766,
|
|
"eval_agent_instruct.json_steps_per_second": 3.907,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_MathInstruct.json_loss": 0.21358835697174072,
|
|
"eval_MathInstruct.json_runtime": 0.3581,
|
|
"eval_MathInstruct.json_samples_per_second": 159.182,
|
|
"eval_MathInstruct.json_steps_per_second": 8.378,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.4455429017543793,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.3013,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 112.849,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.638,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.00893339328467846,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.1004,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 228.974,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 9.955,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.6295539140701294,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.0512,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 312.613,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.538,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_alpaca_gpt4_zh.json_loss": 0.9761592745780945,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.0499,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 220.289,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 20.026,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16511299920883354,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.5956905484199524,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.4851,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 41.232,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.062,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.16566337587286298,
|
|
"grad_norm": 0.38436150550842285,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4609,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.16621375253689244,
|
|
"grad_norm": 0.3946292996406555,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4699,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.16676412920092187,
|
|
"grad_norm": 0.4069615304470062,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4722,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.16731450586495134,
|
|
"grad_norm": 0.371660977602005,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4856,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.16786488252898077,
|
|
"grad_norm": 0.394911527633667,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4804,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.1684152591930102,
|
|
"grad_norm": 0.4873884916305542,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4686,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.16896563585703966,
|
|
"grad_norm": 0.3943842649459839,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4887,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.1695160125210691,
|
|
"grad_norm": 0.3716658055782318,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4898,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.17006638918509856,
|
|
"grad_norm": 0.36271047592163086,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4861,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.170616765849128,
|
|
"grad_norm": 0.3833015263080597,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4814,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.17116714251315746,
|
|
"grad_norm": 0.3661365211009979,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4873,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.1717175191771869,
|
|
"grad_norm": 0.3613869845867157,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4537,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.17226789584121632,
|
|
"grad_norm": 0.34498724341392517,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.483,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.17281827250524578,
|
|
"grad_norm": 0.41466256976127625,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4765,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.17336864916927522,
|
|
"grad_norm": 0.36220455169677734,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4842,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.17391902583330468,
|
|
"grad_norm": 0.38009753823280334,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.482,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.17446940249733411,
|
|
"grad_norm": 0.3589475452899933,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4714,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.17501977916136355,
|
|
"grad_norm": 0.37625178694725037,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4487,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.175570155825393,
|
|
"grad_norm": 0.3818652331829071,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4757,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.17612053248942244,
|
|
"grad_norm": 0.39498913288116455,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4879,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.1766709091534519,
|
|
"grad_norm": 0.3864663243293762,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4815,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.17722128581748134,
|
|
"grad_norm": 0.37452608346939087,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4773,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.17777166248151077,
|
|
"grad_norm": 0.3754761219024658,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4916,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.17832203914554023,
|
|
"grad_norm": 0.3797055780887604,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4663,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.17887241580956967,
|
|
"grad_norm": 0.3640367090702057,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4737,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.17942279247359913,
|
|
"grad_norm": 0.35961100459098816,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4757,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.17997316913762856,
|
|
"grad_norm": 0.40443646907806396,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4789,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.180523545801658,
|
|
"grad_norm": 0.35993334650993347,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4902,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.18107392246568746,
|
|
"grad_norm": 0.3933318853378296,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4726,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.1816242991297169,
|
|
"grad_norm": 0.3923085033893585,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4714,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.18217467579374635,
|
|
"grad_norm": 0.37387627363204956,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.478,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.1827250524577758,
|
|
"grad_norm": 0.3787866532802582,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4849,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.18327542912180522,
|
|
"grad_norm": 0.39361730217933655,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4836,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.18382580578583468,
|
|
"grad_norm": 0.37430262565612793,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4876,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.18437618244986412,
|
|
"grad_norm": 0.3914833068847656,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.48,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.18492655911389358,
|
|
"grad_norm": 0.36528506875038147,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4583,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.185476935777923,
|
|
"grad_norm": 0.3779620826244354,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.483,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.18602731244195247,
|
|
"grad_norm": 0.3712228834629059,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4833,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.1865776891059819,
|
|
"grad_norm": 0.3959150016307831,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4678,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.18712806577001134,
|
|
"grad_norm": 0.38113903999328613,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4794,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.1876784424340408,
|
|
"grad_norm": 0.3872113525867462,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4627,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.18822881909807024,
|
|
"grad_norm": 0.35678407549858093,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4666,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.1887791957620997,
|
|
"grad_norm": 0.37833312153816223,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4734,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.18932957242612913,
|
|
"grad_norm": 0.3900817930698395,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4834,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.18987994909015857,
|
|
"grad_norm": 0.37114864587783813,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4682,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.19043032575418803,
|
|
"grad_norm": 0.37264662981033325,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4815,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.19098070241821746,
|
|
"grad_norm": 0.3758707344532013,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4847,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 0.19153107908224692,
|
|
"grad_norm": 0.38832512497901917,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.486,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 0.19208145574627636,
|
|
"grad_norm": 0.382926344871521,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4844,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 0.1926318324103058,
|
|
"grad_norm": 0.3953557312488556,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.472,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.19318220907433525,
|
|
"grad_norm": 0.36295419931411743,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4792,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 0.1937325857383647,
|
|
"grad_norm": 0.35859328508377075,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4665,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 0.19428296240239415,
|
|
"grad_norm": 0.3658142685890198,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4724,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 0.19483333906642358,
|
|
"grad_norm": 0.3860156834125519,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4803,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 0.19538371573045302,
|
|
"grad_norm": 0.38030922412872314,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4692,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.19593409239448248,
|
|
"grad_norm": 0.417516827583313,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4833,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 0.1964844690585119,
|
|
"grad_norm": 0.39626750349998474,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4808,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 0.19703484572254137,
|
|
"grad_norm": 0.3886042535305023,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4716,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 0.1975852223865708,
|
|
"grad_norm": 0.3816077411174774,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.468,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 0.19813559905060024,
|
|
"grad_norm": 0.39385372400283813,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4671,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.1986859757146297,
|
|
"grad_norm": 0.35457953810691833,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4667,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 0.19923635237865914,
|
|
"grad_norm": 0.39437657594680786,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4637,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 0.1997867290426886,
|
|
"grad_norm": 0.41132184863090515,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4723,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 0.20033710570671803,
|
|
"grad_norm": 0.3640534281730652,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4623,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 0.20088748237074747,
|
|
"grad_norm": 0.39893659949302673,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4881,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.20143785903477693,
|
|
"grad_norm": 0.3677632212638855,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4572,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 0.20198823569880636,
|
|
"grad_norm": 0.40594953298568726,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4726,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 0.20253861236283582,
|
|
"grad_norm": 0.39571645855903625,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4751,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 0.20308898902686526,
|
|
"grad_norm": 0.3569906949996948,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4855,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 0.20363936569089472,
|
|
"grad_norm": 0.39166778326034546,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4864,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.20418974235492415,
|
|
"grad_norm": 0.36861687898635864,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4659,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 0.2047401190189536,
|
|
"grad_norm": 0.3691236078739166,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4688,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 0.20529049568298305,
|
|
"grad_norm": 0.41912853717803955,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4787,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 0.20584087234701248,
|
|
"grad_norm": 0.4022221565246582,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4758,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 0.20639124901104194,
|
|
"grad_norm": 0.402567595243454,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4766,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.20694162567507138,
|
|
"grad_norm": 0.3741600811481476,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4833,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 0.2074920023391008,
|
|
"grad_norm": 0.3958164155483246,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4786,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 0.20804237900313027,
|
|
"grad_norm": 0.37908801436424255,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4715,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 0.2085927556671597,
|
|
"grad_norm": 0.38426473736763,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4874,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 0.20914313233118917,
|
|
"grad_norm": 0.3873310983181,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4677,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.2096935089952186,
|
|
"grad_norm": 0.4033788740634918,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4815,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 0.21024388565924804,
|
|
"grad_norm": 0.40875962376594543,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4879,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 0.2107942623232775,
|
|
"grad_norm": 0.38724496960639954,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4646,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 0.21134463898730693,
|
|
"grad_norm": 0.39307013154029846,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4762,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 0.2118950156513364,
|
|
"grad_norm": 0.37346333265304565,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4775,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.21244539231536583,
|
|
"grad_norm": 0.3753449618816376,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4707,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 0.21299576897939526,
|
|
"grad_norm": 0.3829357922077179,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4825,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 0.21354614564342472,
|
|
"grad_norm": 0.35514822602272034,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4779,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 0.21409652230745416,
|
|
"grad_norm": 0.3714098036289215,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4598,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 0.21464689897148362,
|
|
"grad_norm": 0.3754241168498993,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4736,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.21519727563551305,
|
|
"grad_norm": 0.36637604236602783,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4652,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 0.2157476522995425,
|
|
"grad_norm": 0.367357075214386,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.466,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 0.21629802896357195,
|
|
"grad_norm": 0.3747154176235199,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4668,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 0.21684840562760138,
|
|
"grad_norm": 0.3824009895324707,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4748,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 0.21739878229163084,
|
|
"grad_norm": 0.385030061006546,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4882,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.21794915895566028,
|
|
"grad_norm": 0.35460343956947327,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4664,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 0.21849953561968974,
|
|
"grad_norm": 0.3792308270931244,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4874,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 0.21904991228371917,
|
|
"grad_norm": 0.37190011143684387,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4838,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 0.2196002889477486,
|
|
"grad_norm": 0.3757864832878113,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4538,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"grad_norm": 0.3677947223186493,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4605,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_merge_loss": 0.4150216579437256,
|
|
"eval_merge_runtime": 600.1194,
|
|
"eval_merge_samples_per_second": 56.242,
|
|
"eval_merge_steps_per_second": 2.345,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.5434484481811523,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.4424,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 73.45,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 3.064,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_sharegpt_gpt4.json_loss": 0.7981637716293335,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.6015,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.89,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.468,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_Table_GPT.json_loss": 0.0783885195851326,
|
|
"eval_Table_GPT.json_runtime": 24.9448,
|
|
"eval_Table_GPT.json_samples_per_second": 83.905,
|
|
"eval_Table_GPT.json_steps_per_second": 3.528,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_gpt_4o_200k.json_loss": 0.8245088458061218,
|
|
"eval_gpt_4o_200k.json_runtime": 48.4135,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.737,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.412,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_multi_turn_datas.json_loss": 0.35650402307510376,
|
|
"eval_multi_turn_datas.json_runtime": 75.5012,
|
|
"eval_multi_turn_datas.json_samples_per_second": 53.006,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.212,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_table_python_code_datas.json_loss": 0.2912423610687256,
|
|
"eval_table_python_code_datas.json_runtime": 43.0138,
|
|
"eval_table_python_code_datas.json_samples_per_second": 50.193,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.092,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_tabular_llm_data.json_loss": 0.11931464821100235,
|
|
"eval_tabular_llm_data.json_runtime": 8.524,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.86,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.29,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_python_code_critic_21k.json_loss": 0.5899094343185425,
|
|
"eval_python_code_critic_21k.json_runtime": 3.2108,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 185.935,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.786,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_all_merge_table_dataset.json_loss": 0.08210163563489914,
|
|
"eval_all_merge_table_dataset.json_runtime": 23.2334,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 30.646,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.291,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.5942392349243164,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.3672,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 68.001,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.842,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_ultrainteract_sft.json_loss": 0.43230774998664856,
|
|
"eval_ultrainteract_sft.json_runtime": 8.6469,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 168.384,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.055,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.10562511533498764,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.1256,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 270.776,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.928,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_sft_react_sql_datas.json_loss": 0.6536443829536438,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.8424,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 40.039,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.785,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_all_merge_code.json_loss": 0.2989647090435028,
|
|
"eval_all_merge_code.json_runtime": 0.3335,
|
|
"eval_all_merge_code.json_samples_per_second": 188.9,
|
|
"eval_all_merge_code.json_steps_per_second": 8.995,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_magpie_datas.json_loss": 0.4389919340610504,
|
|
"eval_magpie_datas.json_runtime": 2.209,
|
|
"eval_magpie_datas.json_samples_per_second": 77.862,
|
|
"eval_magpie_datas.json_steps_per_second": 3.621,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_train_data_for_qwen.json_loss": 0.0057810284197330475,
|
|
"eval_train_data_for_qwen.json_runtime": 0.2434,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 41.087,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.109,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_alpaca_cleaned.json_loss": 0.9368440508842468,
|
|
"eval_alpaca_cleaned.json_runtime": 0.1149,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 234.893,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.399,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_agent_instruct.json_loss": 0.2261410802602768,
|
|
"eval_agent_instruct.json_runtime": 0.5137,
|
|
"eval_agent_instruct.json_samples_per_second": 93.432,
|
|
"eval_agent_instruct.json_steps_per_second": 3.893,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_MathInstruct.json_loss": 0.208473339676857,
|
|
"eval_MathInstruct.json_runtime": 0.3639,
|
|
"eval_MathInstruct.json_samples_per_second": 156.645,
|
|
"eval_MathInstruct.json_steps_per_second": 8.244,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.44293999671936035,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.2997,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 113.456,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.674,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.010015022940933704,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.1001,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 229.814,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 9.992,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.591582179069519,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.0515,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 310.705,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.419,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_alpaca_gpt4_zh.json_loss": 0.9911380410194397,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.0498,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 221.019,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 20.093,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.22015066561177807,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.5947377681732178,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.485,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 41.237,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.062,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.2207010422758075,
|
|
"grad_norm": 0.36098968982696533,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4646,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 0.22125141893983696,
|
|
"grad_norm": 0.3653786778450012,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4501,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 0.2218017956038664,
|
|
"grad_norm": 0.36442849040031433,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4686,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 0.22235217226789583,
|
|
"grad_norm": 0.3782612383365631,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4598,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 0.2229025489319253,
|
|
"grad_norm": 0.39521896839141846,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4679,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.22345292559595473,
|
|
"grad_norm": 0.3727470636367798,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4803,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 0.2240033022599842,
|
|
"grad_norm": 0.3883068263530731,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4773,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 0.22455367892401362,
|
|
"grad_norm": 0.37147605419158936,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4825,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 0.22510405558804306,
|
|
"grad_norm": 0.3924333155155182,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4698,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 0.22565443225207252,
|
|
"grad_norm": 0.38133057951927185,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4842,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.22620480891610195,
|
|
"grad_norm": 0.36132821440696716,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4594,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 0.2267551855801314,
|
|
"grad_norm": 0.39988580346107483,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4795,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 0.22730556224416085,
|
|
"grad_norm": 0.38140830397605896,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4649,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 0.22785593890819028,
|
|
"grad_norm": 0.3726978898048401,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4603,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 0.22840631557221974,
|
|
"grad_norm": 0.3880995512008667,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4739,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.22895669223624918,
|
|
"grad_norm": 0.4118787944316864,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4733,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 0.22950706890027864,
|
|
"grad_norm": 0.37878745794296265,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4922,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 0.23005744556430807,
|
|
"grad_norm": 0.3838474154472351,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4646,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 0.2306078222283375,
|
|
"grad_norm": 0.37345945835113525,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4798,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 0.23115819889236697,
|
|
"grad_norm": 0.36341801285743713,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4773,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.2317085755563964,
|
|
"grad_norm": 0.38800522685050964,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4806,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 0.23225895222042586,
|
|
"grad_norm": 0.38882526755332947,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4765,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 0.2328093288844553,
|
|
"grad_norm": 0.37744489312171936,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4842,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 0.23335970554848473,
|
|
"grad_norm": 0.39916718006134033,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.467,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 0.2339100822125142,
|
|
"grad_norm": 0.36556801199913025,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4711,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.23446045887654363,
|
|
"grad_norm": 0.3993853032588959,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4656,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 0.2350108355405731,
|
|
"grad_norm": 0.39630356431007385,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4734,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 0.23556121220460252,
|
|
"grad_norm": 0.3797578513622284,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4718,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 0.23611158886863198,
|
|
"grad_norm": 0.38648873567581177,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4751,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 0.23666196553266142,
|
|
"grad_norm": 0.3934420347213745,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4653,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.23721234219669085,
|
|
"grad_norm": 0.3899431824684143,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4644,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 0.2377627188607203,
|
|
"grad_norm": 0.3696826696395874,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.482,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 0.23831309552474975,
|
|
"grad_norm": 0.352923184633255,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4707,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 0.2388634721887792,
|
|
"grad_norm": 0.36678972840309143,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4687,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 0.23941384885280864,
|
|
"grad_norm": 0.38986021280288696,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4613,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.23996422551683808,
|
|
"grad_norm": 0.3684535622596741,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4734,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 0.24051460218086754,
|
|
"grad_norm": 0.36672261357307434,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4796,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 0.24106497884489697,
|
|
"grad_norm": 0.39910420775413513,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4681,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 0.24161535550892643,
|
|
"grad_norm": 0.38694077730178833,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4821,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 0.24216573217295587,
|
|
"grad_norm": 0.4555080831050873,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4882,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.2427161088369853,
|
|
"grad_norm": 0.3934450149536133,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4778,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 0.24326648550101476,
|
|
"grad_norm": 0.35743412375450134,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4793,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 0.2438168621650442,
|
|
"grad_norm": 0.3518178462982178,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.472,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 0.24436723882907366,
|
|
"grad_norm": 0.35367751121520996,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4747,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 0.2449176154931031,
|
|
"grad_norm": 0.3810805678367615,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4834,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.24546799215713253,
|
|
"grad_norm": 0.38103243708610535,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4763,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 0.246018368821162,
|
|
"grad_norm": 0.3839399218559265,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4696,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 0.24656874548519142,
|
|
"grad_norm": 0.41292649507522583,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4777,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 0.24711912214922088,
|
|
"grad_norm": 0.36179229617118835,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4668,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 0.24766949881325032,
|
|
"grad_norm": 0.3638279139995575,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4645,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.24821987547727975,
|
|
"grad_norm": 0.3458470106124878,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4746,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 0.2487702521413092,
|
|
"grad_norm": 0.3822806775569916,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4715,
|
|
"step": 904
|
|
},
|
|
{
|
|
"epoch": 0.24932062880533865,
|
|
"grad_norm": 0.3655596077442169,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4659,
|
|
"step": 906
|
|
},
|
|
{
|
|
"epoch": 0.2498710054693681,
|
|
"grad_norm": 0.3868783116340637,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4743,
|
|
"step": 908
|
|
},
|
|
{
|
|
"epoch": 0.25042138213339754,
|
|
"grad_norm": 0.3778232932090759,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4652,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.250971758797427,
|
|
"grad_norm": 0.36664894223213196,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4554,
|
|
"step": 912
|
|
},
|
|
{
|
|
"epoch": 0.2515221354614564,
|
|
"grad_norm": 0.3995139002799988,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4683,
|
|
"step": 914
|
|
},
|
|
{
|
|
"epoch": 0.25207251212548587,
|
|
"grad_norm": 0.40083470940589905,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4673,
|
|
"step": 916
|
|
},
|
|
{
|
|
"epoch": 0.25262288878951533,
|
|
"grad_norm": 0.37919968366622925,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4776,
|
|
"step": 918
|
|
},
|
|
{
|
|
"epoch": 0.2531732654535448,
|
|
"grad_norm": 0.3586704432964325,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4792,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.2537236421175742,
|
|
"grad_norm": 0.3744722902774811,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.463,
|
|
"step": 922
|
|
},
|
|
{
|
|
"epoch": 0.25427401878160366,
|
|
"grad_norm": 0.37209680676460266,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4805,
|
|
"step": 924
|
|
},
|
|
{
|
|
"epoch": 0.2548243954456331,
|
|
"grad_norm": 0.40809133648872375,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4781,
|
|
"step": 926
|
|
},
|
|
{
|
|
"epoch": 0.25537477210966253,
|
|
"grad_norm": 0.37261903285980225,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4617,
|
|
"step": 928
|
|
},
|
|
{
|
|
"epoch": 0.255925148773692,
|
|
"grad_norm": 0.37391313910484314,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4617,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.25647552543772145,
|
|
"grad_norm": 0.36610838770866394,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4642,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 0.25702590210175086,
|
|
"grad_norm": 0.3854142129421234,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4652,
|
|
"step": 934
|
|
},
|
|
{
|
|
"epoch": 0.2575762787657803,
|
|
"grad_norm": 0.365159809589386,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4714,
|
|
"step": 936
|
|
},
|
|
{
|
|
"epoch": 0.2581266554298098,
|
|
"grad_norm": 0.41678836941719055,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4854,
|
|
"step": 938
|
|
},
|
|
{
|
|
"epoch": 0.25867703209383924,
|
|
"grad_norm": 0.380215585231781,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4785,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.25922740875786865,
|
|
"grad_norm": 0.3704361617565155,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4433,
|
|
"step": 942
|
|
},
|
|
{
|
|
"epoch": 0.2597777854218981,
|
|
"grad_norm": 0.34440556168556213,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4642,
|
|
"step": 944
|
|
},
|
|
{
|
|
"epoch": 0.2603281620859276,
|
|
"grad_norm": 0.36701446771621704,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4533,
|
|
"step": 946
|
|
},
|
|
{
|
|
"epoch": 0.260878538749957,
|
|
"grad_norm": 0.3694971799850464,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4942,
|
|
"step": 948
|
|
},
|
|
{
|
|
"epoch": 0.26142891541398644,
|
|
"grad_norm": 0.3697713017463684,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4586,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.2619792920780159,
|
|
"grad_norm": 0.36559173464775085,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4679,
|
|
"step": 952
|
|
},
|
|
{
|
|
"epoch": 0.26252966874204536,
|
|
"grad_norm": 0.3704969584941864,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4624,
|
|
"step": 954
|
|
},
|
|
{
|
|
"epoch": 0.26308004540607477,
|
|
"grad_norm": 0.3804495334625244,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4603,
|
|
"step": 956
|
|
},
|
|
{
|
|
"epoch": 0.26363042207010423,
|
|
"grad_norm": 0.34987303614616394,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4679,
|
|
"step": 958
|
|
},
|
|
{
|
|
"epoch": 0.2641807987341337,
|
|
"grad_norm": 0.3723856508731842,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4631,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.2647311753981631,
|
|
"grad_norm": 0.35623612999916077,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4627,
|
|
"step": 962
|
|
},
|
|
{
|
|
"epoch": 0.26528155206219256,
|
|
"grad_norm": 0.37969711422920227,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4815,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 0.265831928726222,
|
|
"grad_norm": 0.3889734447002411,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.471,
|
|
"step": 966
|
|
},
|
|
{
|
|
"epoch": 0.26638230539025143,
|
|
"grad_norm": 0.39106228947639465,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4542,
|
|
"step": 968
|
|
},
|
|
{
|
|
"epoch": 0.2669326820542809,
|
|
"grad_norm": 0.38163650035858154,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4604,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.26748305871831035,
|
|
"grad_norm": 0.3733852505683899,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4768,
|
|
"step": 972
|
|
},
|
|
{
|
|
"epoch": 0.2680334353823398,
|
|
"grad_norm": 0.3894038796424866,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4816,
|
|
"step": 974
|
|
},
|
|
{
|
|
"epoch": 0.2685838120463692,
|
|
"grad_norm": 0.3697439432144165,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4731,
|
|
"step": 976
|
|
},
|
|
{
|
|
"epoch": 0.2691341887103987,
|
|
"grad_norm": 0.39549171924591064,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.473,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 0.26968456537442814,
|
|
"grad_norm": 0.38712403178215027,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4717,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.27023494203845755,
|
|
"grad_norm": 0.3775619864463806,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4638,
|
|
"step": 982
|
|
},
|
|
{
|
|
"epoch": 0.270785318702487,
|
|
"grad_norm": 0.38664135336875916,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4655,
|
|
"step": 984
|
|
},
|
|
{
|
|
"epoch": 0.27133569536651647,
|
|
"grad_norm": 0.3730804920196533,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4672,
|
|
"step": 986
|
|
},
|
|
{
|
|
"epoch": 0.2718860720305459,
|
|
"grad_norm": 0.36626750230789185,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4562,
|
|
"step": 988
|
|
},
|
|
{
|
|
"epoch": 0.27243644869457534,
|
|
"grad_norm": 0.38708406686782837,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4583,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.2729868253586048,
|
|
"grad_norm": 0.37348565459251404,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4709,
|
|
"step": 992
|
|
},
|
|
{
|
|
"epoch": 0.27353720202263426,
|
|
"grad_norm": 0.39145755767822266,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4667,
|
|
"step": 994
|
|
},
|
|
{
|
|
"epoch": 0.27408757868666367,
|
|
"grad_norm": 0.3615020215511322,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4585,
|
|
"step": 996
|
|
},
|
|
{
|
|
"epoch": 0.27463795535069313,
|
|
"grad_norm": 0.38545548915863037,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.472,
|
|
"step": 998
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"grad_norm": 0.3605005741119385,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4575,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_merge_loss": 0.4092504382133484,
|
|
"eval_merge_runtime": 599.649,
|
|
"eval_merge_samples_per_second": 56.286,
|
|
"eval_merge_steps_per_second": 2.346,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.534787118434906,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.3465,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 74.131,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 3.093,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_sharegpt_gpt4.json_loss": 0.7911589741706848,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.721,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.668,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.459,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_Table_GPT.json_loss": 0.07000603526830673,
|
|
"eval_Table_GPT.json_runtime": 24.9973,
|
|
"eval_Table_GPT.json_samples_per_second": 83.729,
|
|
"eval_Table_GPT.json_steps_per_second": 3.52,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_gpt_4o_200k.json_loss": 0.8180866837501526,
|
|
"eval_gpt_4o_200k.json_runtime": 48.5388,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.402,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.398,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_multi_turn_datas.json_loss": 0.34955134987831116,
|
|
"eval_multi_turn_datas.json_runtime": 75.86,
|
|
"eval_multi_turn_datas.json_samples_per_second": 52.755,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.201,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_table_python_code_datas.json_loss": 0.285086989402771,
|
|
"eval_table_python_code_datas.json_runtime": 43.1585,
|
|
"eval_table_python_code_datas.json_samples_per_second": 50.025,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.085,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_tabular_llm_data.json_loss": 0.12198314070701599,
|
|
"eval_tabular_llm_data.json_runtime": 8.5654,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.72,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.284,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_python_code_critic_21k.json_loss": 0.5841899514198303,
|
|
"eval_python_code_critic_21k.json_runtime": 3.2248,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 185.125,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.752,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_all_merge_table_dataset.json_loss": 0.08110550791025162,
|
|
"eval_all_merge_table_dataset.json_runtime": 23.4122,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 30.411,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.281,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.5908513069152832,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.4627,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 67.801,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.834,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_ultrainteract_sft.json_loss": 0.42869675159454346,
|
|
"eval_ultrainteract_sft.json_runtime": 8.6816,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 167.711,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.026,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.10359195619821548,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.1301,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 261.368,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.375,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_sft_react_sql_datas.json_loss": 0.6493918895721436,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.8489,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 40.006,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.784,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_all_merge_code.json_loss": 0.29959577322006226,
|
|
"eval_all_merge_code.json_runtime": 0.3379,
|
|
"eval_all_merge_code.json_samples_per_second": 186.458,
|
|
"eval_all_merge_code.json_steps_per_second": 8.879,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_magpie_datas.json_loss": 0.4377444088459015,
|
|
"eval_magpie_datas.json_runtime": 2.2091,
|
|
"eval_magpie_datas.json_samples_per_second": 77.86,
|
|
"eval_magpie_datas.json_steps_per_second": 3.621,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_train_data_for_qwen.json_loss": 0.003975613508373499,
|
|
"eval_train_data_for_qwen.json_runtime": 0.2434,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 41.087,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.109,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_alpaca_cleaned.json_loss": 0.9270830750465393,
|
|
"eval_alpaca_cleaned.json_runtime": 0.1147,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 235.404,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.437,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_agent_instruct.json_loss": 0.2235051840543747,
|
|
"eval_agent_instruct.json_runtime": 0.5147,
|
|
"eval_agent_instruct.json_samples_per_second": 93.255,
|
|
"eval_agent_instruct.json_steps_per_second": 3.886,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_MathInstruct.json_loss": 0.20924758911132812,
|
|
"eval_MathInstruct.json_runtime": 0.3588,
|
|
"eval_MathInstruct.json_samples_per_second": 158.853,
|
|
"eval_MathInstruct.json_steps_per_second": 8.361,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.44443246722221375,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.3017,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 112.684,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.628,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.008116651326417923,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.1004,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 229.157,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 9.963,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.588812232017517,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.0516,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 310.032,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.377,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_alpaca_gpt4_zh.json_loss": 0.9696416258811951,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.0501,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 219.488,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 19.953,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.2751883320147226,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.5965829491615295,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.4872,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 41.049,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.052,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.275738708678752,
|
|
"grad_norm": 0.3598334789276123,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4624,
|
|
"step": 1002
|
|
},
|
|
{
|
|
"epoch": 0.27628908534278146,
|
|
"grad_norm": 0.3716166615486145,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4593,
|
|
"step": 1004
|
|
},
|
|
{
|
|
"epoch": 0.2768394620068109,
|
|
"grad_norm": 0.3814164996147156,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4581,
|
|
"step": 1006
|
|
},
|
|
{
|
|
"epoch": 0.2773898386708404,
|
|
"grad_norm": 0.3595026731491089,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4826,
|
|
"step": 1008
|
|
},
|
|
{
|
|
"epoch": 0.2779402153348698,
|
|
"grad_norm": 0.3587126135826111,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4563,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.27849059199889925,
|
|
"grad_norm": 0.36048388481140137,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4646,
|
|
"step": 1012
|
|
},
|
|
{
|
|
"epoch": 0.2790409686629287,
|
|
"grad_norm": 0.37650784850120544,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4658,
|
|
"step": 1014
|
|
},
|
|
{
|
|
"epoch": 0.2795913453269581,
|
|
"grad_norm": 0.34934109449386597,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4535,
|
|
"step": 1016
|
|
},
|
|
{
|
|
"epoch": 0.2801417219909876,
|
|
"grad_norm": 0.375130295753479,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4802,
|
|
"step": 1018
|
|
},
|
|
{
|
|
"epoch": 0.28069209865501704,
|
|
"grad_norm": 0.3595198094844818,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4736,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.28124247531904645,
|
|
"grad_norm": 0.37816157937049866,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4639,
|
|
"step": 1022
|
|
},
|
|
{
|
|
"epoch": 0.2817928519830759,
|
|
"grad_norm": 0.39598193764686584,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4544,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 0.28234322864710537,
|
|
"grad_norm": 0.35407206416130066,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4342,
|
|
"step": 1026
|
|
},
|
|
{
|
|
"epoch": 0.28289360531113483,
|
|
"grad_norm": 0.3630298972129822,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.48,
|
|
"step": 1028
|
|
},
|
|
{
|
|
"epoch": 0.28344398197516424,
|
|
"grad_norm": 0.35917675495147705,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4647,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.2839943586391937,
|
|
"grad_norm": 0.36868980526924133,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4633,
|
|
"step": 1032
|
|
},
|
|
{
|
|
"epoch": 0.28454473530322316,
|
|
"grad_norm": 0.38559168577194214,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4786,
|
|
"step": 1034
|
|
},
|
|
{
|
|
"epoch": 0.28509511196725257,
|
|
"grad_norm": 0.3563440442085266,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4703,
|
|
"step": 1036
|
|
},
|
|
{
|
|
"epoch": 0.28564548863128203,
|
|
"grad_norm": 0.3761630654335022,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4712,
|
|
"step": 1038
|
|
},
|
|
{
|
|
"epoch": 0.2861958652953115,
|
|
"grad_norm": 0.3870238661766052,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4622,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.2867462419593409,
|
|
"grad_norm": 0.36192306876182556,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4619,
|
|
"step": 1042
|
|
},
|
|
{
|
|
"epoch": 0.28729661862337036,
|
|
"grad_norm": 0.3688748776912689,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.457,
|
|
"step": 1044
|
|
},
|
|
{
|
|
"epoch": 0.2878469952873998,
|
|
"grad_norm": 0.38211309909820557,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4661,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 0.2883973719514293,
|
|
"grad_norm": 0.36421847343444824,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4647,
|
|
"step": 1048
|
|
},
|
|
{
|
|
"epoch": 0.2889477486154587,
|
|
"grad_norm": 0.38917919993400574,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4573,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.28949812527948815,
|
|
"grad_norm": 0.3668692111968994,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4545,
|
|
"step": 1052
|
|
},
|
|
{
|
|
"epoch": 0.2900485019435176,
|
|
"grad_norm": 0.3869079649448395,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4569,
|
|
"step": 1054
|
|
},
|
|
{
|
|
"epoch": 0.290598878607547,
|
|
"grad_norm": 0.3763209283351898,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.451,
|
|
"step": 1056
|
|
},
|
|
{
|
|
"epoch": 0.2911492552715765,
|
|
"grad_norm": 0.37899014353752136,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4658,
|
|
"step": 1058
|
|
},
|
|
{
|
|
"epoch": 0.29169963193560594,
|
|
"grad_norm": 0.38784778118133545,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4589,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.2922500085996354,
|
|
"grad_norm": 0.38340142369270325,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4644,
|
|
"step": 1062
|
|
},
|
|
{
|
|
"epoch": 0.2928003852636648,
|
|
"grad_norm": 0.3758372962474823,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4597,
|
|
"step": 1064
|
|
},
|
|
{
|
|
"epoch": 0.29335076192769427,
|
|
"grad_norm": 0.36990198493003845,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4577,
|
|
"step": 1066
|
|
},
|
|
{
|
|
"epoch": 0.29390113859172373,
|
|
"grad_norm": 0.35997095704078674,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.452,
|
|
"step": 1068
|
|
},
|
|
{
|
|
"epoch": 0.29445151525575314,
|
|
"grad_norm": 0.3728466331958771,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4567,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.2950018919197826,
|
|
"grad_norm": 0.3471437990665436,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4661,
|
|
"step": 1072
|
|
},
|
|
{
|
|
"epoch": 0.29555226858381206,
|
|
"grad_norm": 0.39197105169296265,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4738,
|
|
"step": 1074
|
|
},
|
|
{
|
|
"epoch": 0.29610264524784147,
|
|
"grad_norm": 0.366745263338089,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4555,
|
|
"step": 1076
|
|
},
|
|
{
|
|
"epoch": 0.29665302191187093,
|
|
"grad_norm": 0.3721451759338379,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4784,
|
|
"step": 1078
|
|
},
|
|
{
|
|
"epoch": 0.2972033985759004,
|
|
"grad_norm": 0.3505246341228485,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4486,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.29775377523992985,
|
|
"grad_norm": 0.37022680044174194,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4631,
|
|
"step": 1082
|
|
},
|
|
{
|
|
"epoch": 0.29830415190395926,
|
|
"grad_norm": 0.3808286190032959,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.472,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 0.2988545285679887,
|
|
"grad_norm": 0.3860435485839844,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4541,
|
|
"step": 1086
|
|
},
|
|
{
|
|
"epoch": 0.2994049052320182,
|
|
"grad_norm": 0.35552406311035156,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4565,
|
|
"step": 1088
|
|
},
|
|
{
|
|
"epoch": 0.2999552818960476,
|
|
"grad_norm": 0.3758242428302765,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4803,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.30050565856007705,
|
|
"grad_norm": 0.3900710940361023,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4658,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 0.3010560352241065,
|
|
"grad_norm": 0.38439512252807617,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4677,
|
|
"step": 1094
|
|
},
|
|
{
|
|
"epoch": 0.3016064118881359,
|
|
"grad_norm": 0.3970472812652588,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4751,
|
|
"step": 1096
|
|
},
|
|
{
|
|
"epoch": 0.3021567885521654,
|
|
"grad_norm": 0.36555778980255127,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4556,
|
|
"step": 1098
|
|
},
|
|
{
|
|
"epoch": 0.30270716521619484,
|
|
"grad_norm": 0.3682638108730316,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4504,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.3032575418802243,
|
|
"grad_norm": 0.4228995442390442,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4736,
|
|
"step": 1102
|
|
},
|
|
{
|
|
"epoch": 0.3038079185442537,
|
|
"grad_norm": 0.35070449113845825,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4589,
|
|
"step": 1104
|
|
},
|
|
{
|
|
"epoch": 0.30435829520828317,
|
|
"grad_norm": 0.40524446964263916,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4616,
|
|
"step": 1106
|
|
},
|
|
{
|
|
"epoch": 0.30490867187231263,
|
|
"grad_norm": 0.3461023271083832,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4679,
|
|
"step": 1108
|
|
},
|
|
{
|
|
"epoch": 0.30545904853634204,
|
|
"grad_norm": 0.3741723299026489,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4618,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.3060094252003715,
|
|
"grad_norm": 0.37440451979637146,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4638,
|
|
"step": 1112
|
|
},
|
|
{
|
|
"epoch": 0.30655980186440096,
|
|
"grad_norm": 0.34469377994537354,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4426,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 0.3071101785284304,
|
|
"grad_norm": 0.35499683022499084,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4548,
|
|
"step": 1116
|
|
},
|
|
{
|
|
"epoch": 0.30766055519245983,
|
|
"grad_norm": 0.3623688220977783,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4574,
|
|
"step": 1118
|
|
},
|
|
{
|
|
"epoch": 0.3082109318564893,
|
|
"grad_norm": 0.3487359583377838,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4632,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.30876130852051875,
|
|
"grad_norm": 0.36232292652130127,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.462,
|
|
"step": 1122
|
|
},
|
|
{
|
|
"epoch": 0.30931168518454816,
|
|
"grad_norm": 0.38301897048950195,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4545,
|
|
"step": 1124
|
|
},
|
|
{
|
|
"epoch": 0.3098620618485776,
|
|
"grad_norm": 0.3788921535015106,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4614,
|
|
"step": 1126
|
|
},
|
|
{
|
|
"epoch": 0.3104124385126071,
|
|
"grad_norm": 0.3723096251487732,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4658,
|
|
"step": 1128
|
|
},
|
|
{
|
|
"epoch": 0.3109628151766365,
|
|
"grad_norm": 0.3926720917224884,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4602,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.31151319184066595,
|
|
"grad_norm": 0.3565811514854431,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4692,
|
|
"step": 1132
|
|
},
|
|
{
|
|
"epoch": 0.3120635685046954,
|
|
"grad_norm": 0.38179391622543335,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4581,
|
|
"step": 1134
|
|
},
|
|
{
|
|
"epoch": 0.31261394516872487,
|
|
"grad_norm": 0.3732840418815613,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4628,
|
|
"step": 1136
|
|
},
|
|
{
|
|
"epoch": 0.3131643218327543,
|
|
"grad_norm": 0.3934018313884735,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4634,
|
|
"step": 1138
|
|
},
|
|
{
|
|
"epoch": 0.31371469849678374,
|
|
"grad_norm": 0.3575834035873413,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4507,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.3142650751608132,
|
|
"grad_norm": 0.3623636066913605,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4547,
|
|
"step": 1142
|
|
},
|
|
{
|
|
"epoch": 0.3148154518248426,
|
|
"grad_norm": 0.3794458508491516,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4661,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 0.31536582848887207,
|
|
"grad_norm": 0.3896718919277191,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4646,
|
|
"step": 1146
|
|
},
|
|
{
|
|
"epoch": 0.31591620515290153,
|
|
"grad_norm": 0.3608621060848236,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4522,
|
|
"step": 1148
|
|
},
|
|
{
|
|
"epoch": 0.31646658181693094,
|
|
"grad_norm": 0.37019404768943787,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4548,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.3170169584809604,
|
|
"grad_norm": 0.37957248091697693,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4554,
|
|
"step": 1152
|
|
},
|
|
{
|
|
"epoch": 0.31756733514498986,
|
|
"grad_norm": 0.3605276048183441,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4679,
|
|
"step": 1154
|
|
},
|
|
{
|
|
"epoch": 0.3181177118090193,
|
|
"grad_norm": 0.37218716740608215,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4686,
|
|
"step": 1156
|
|
},
|
|
{
|
|
"epoch": 0.3186680884730487,
|
|
"grad_norm": 0.37037035822868347,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4898,
|
|
"step": 1158
|
|
},
|
|
{
|
|
"epoch": 0.3192184651370782,
|
|
"grad_norm": 0.3569047749042511,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4619,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.31976884180110765,
|
|
"grad_norm": 0.3728378117084503,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4544,
|
|
"step": 1162
|
|
},
|
|
{
|
|
"epoch": 0.32031921846513706,
|
|
"grad_norm": 0.35970696806907654,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4704,
|
|
"step": 1164
|
|
},
|
|
{
|
|
"epoch": 0.3208695951291665,
|
|
"grad_norm": 0.36476969718933105,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4605,
|
|
"step": 1166
|
|
},
|
|
{
|
|
"epoch": 0.321419971793196,
|
|
"grad_norm": 0.35015928745269775,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4653,
|
|
"step": 1168
|
|
},
|
|
{
|
|
"epoch": 0.3219703484572254,
|
|
"grad_norm": 0.3600417375564575,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4557,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.32252072512125485,
|
|
"grad_norm": 0.36994755268096924,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4601,
|
|
"step": 1172
|
|
},
|
|
{
|
|
"epoch": 0.3230711017852843,
|
|
"grad_norm": 0.39908286929130554,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.472,
|
|
"step": 1174
|
|
},
|
|
{
|
|
"epoch": 0.32362147844931377,
|
|
"grad_norm": 0.3717789947986603,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4646,
|
|
"step": 1176
|
|
},
|
|
{
|
|
"epoch": 0.3241718551133432,
|
|
"grad_norm": 0.3617453873157501,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4606,
|
|
"step": 1178
|
|
},
|
|
{
|
|
"epoch": 0.32472223177737264,
|
|
"grad_norm": 0.35809728503227234,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4548,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.3252726084414021,
|
|
"grad_norm": 0.3767383396625519,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4785,
|
|
"step": 1182
|
|
},
|
|
{
|
|
"epoch": 0.3258229851054315,
|
|
"grad_norm": 0.3819461166858673,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4695,
|
|
"step": 1184
|
|
},
|
|
{
|
|
"epoch": 0.32637336176946097,
|
|
"grad_norm": 0.3590524196624756,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.468,
|
|
"step": 1186
|
|
},
|
|
{
|
|
"epoch": 0.32692373843349043,
|
|
"grad_norm": 0.37356823682785034,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4628,
|
|
"step": 1188
|
|
},
|
|
{
|
|
"epoch": 0.3274741150975199,
|
|
"grad_norm": 0.39389410614967346,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4686,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.3280244917615493,
|
|
"grad_norm": 0.36901354789733887,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4623,
|
|
"step": 1192
|
|
},
|
|
{
|
|
"epoch": 0.32857486842557876,
|
|
"grad_norm": 0.35733821988105774,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.457,
|
|
"step": 1194
|
|
},
|
|
{
|
|
"epoch": 0.3291252450896082,
|
|
"grad_norm": 0.3803520202636719,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4661,
|
|
"step": 1196
|
|
},
|
|
{
|
|
"epoch": 0.3296756217536376,
|
|
"grad_norm": 0.36812326312065125,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.453,
|
|
"step": 1198
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"grad_norm": 0.37463024258613586,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4611,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_merge_loss": 0.4038620591163635,
|
|
"eval_merge_runtime": 600.528,
|
|
"eval_merge_samples_per_second": 56.204,
|
|
"eval_merge_steps_per_second": 2.343,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.5311903953552246,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.3899,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 73.822,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 3.08,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_sharegpt_gpt4.json_loss": 0.7848892211914062,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.7548,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.605,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.456,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_Table_GPT.json_loss": 0.07294219732284546,
|
|
"eval_Table_GPT.json_runtime": 25.0251,
|
|
"eval_Table_GPT.json_samples_per_second": 83.636,
|
|
"eval_Table_GPT.json_steps_per_second": 3.516,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_gpt_4o_200k.json_loss": 0.8128483295440674,
|
|
"eval_gpt_4o_200k.json_runtime": 48.5727,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.311,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.394,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_multi_turn_datas.json_loss": 0.343874990940094,
|
|
"eval_multi_turn_datas.json_runtime": 75.779,
|
|
"eval_multi_turn_datas.json_samples_per_second": 52.811,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.204,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_table_python_code_datas.json_loss": 0.2791996896266937,
|
|
"eval_table_python_code_datas.json_runtime": 43.1703,
|
|
"eval_table_python_code_datas.json_samples_per_second": 50.011,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.085,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_tabular_llm_data.json_loss": 0.11510641872882843,
|
|
"eval_tabular_llm_data.json_runtime": 8.5754,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.687,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.283,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_python_code_critic_21k.json_loss": 0.5806341171264648,
|
|
"eval_python_code_critic_21k.json_runtime": 3.2355,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 184.517,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.727,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_all_merge_table_dataset.json_loss": 0.0781954750418663,
|
|
"eval_all_merge_table_dataset.json_runtime": 23.3576,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 30.483,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.284,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.5880293846130371,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.5337,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 67.653,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.828,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_ultrainteract_sft.json_loss": 0.42568570375442505,
|
|
"eval_ultrainteract_sft.json_runtime": 8.665,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 168.033,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.04,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.10025755316019058,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.127,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 267.683,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.746,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_sft_react_sql_datas.json_loss": 0.6435717344284058,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.8854,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 39.82,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.775,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_all_merge_code.json_loss": 0.29655295610427856,
|
|
"eval_all_merge_code.json_runtime": 0.3333,
|
|
"eval_all_merge_code.json_samples_per_second": 189.039,
|
|
"eval_all_merge_code.json_steps_per_second": 9.002,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_magpie_datas.json_loss": 0.4353857934474945,
|
|
"eval_magpie_datas.json_runtime": 2.22,
|
|
"eval_magpie_datas.json_samples_per_second": 77.478,
|
|
"eval_magpie_datas.json_steps_per_second": 3.604,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_train_data_for_qwen.json_loss": 0.0036680654156953096,
|
|
"eval_train_data_for_qwen.json_runtime": 0.2448,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 40.856,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.086,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_alpaca_cleaned.json_loss": 0.9278478622436523,
|
|
"eval_alpaca_cleaned.json_runtime": 0.1139,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 237.139,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.566,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_agent_instruct.json_loss": 0.22283704578876495,
|
|
"eval_agent_instruct.json_runtime": 0.5129,
|
|
"eval_agent_instruct.json_samples_per_second": 93.582,
|
|
"eval_agent_instruct.json_steps_per_second": 3.899,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_MathInstruct.json_loss": 0.20810073614120483,
|
|
"eval_MathInstruct.json_runtime": 0.3587,
|
|
"eval_MathInstruct.json_samples_per_second": 158.905,
|
|
"eval_MathInstruct.json_steps_per_second": 8.363,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.44691047072410583,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.3024,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 112.419,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.613,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.009029570966959,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.1005,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 228.948,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 9.954,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.5715256929397583,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.0514,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 311.088,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.443,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_alpaca_gpt4_zh.json_loss": 0.9568694233894348,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.0501,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 219.517,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 19.956,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.3302259984176671,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.5982481837272644,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.487,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 41.068,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.053,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.33077637508169655,
|
|
"grad_norm": 0.3862474262714386,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.467,
|
|
"step": 1202
|
|
},
|
|
{
|
|
"epoch": 0.33132675174572596,
|
|
"grad_norm": 0.3586987555027008,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4586,
|
|
"step": 1204
|
|
},
|
|
{
|
|
"epoch": 0.3318771284097554,
|
|
"grad_norm": 0.36768838763237,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4658,
|
|
"step": 1206
|
|
},
|
|
{
|
|
"epoch": 0.3324275050737849,
|
|
"grad_norm": 0.36789608001708984,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4479,
|
|
"step": 1208
|
|
},
|
|
{
|
|
"epoch": 0.33297788173781434,
|
|
"grad_norm": 0.3875747323036194,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4651,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.33352825840184375,
|
|
"grad_norm": 0.37122058868408203,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4474,
|
|
"step": 1212
|
|
},
|
|
{
|
|
"epoch": 0.3340786350658732,
|
|
"grad_norm": 0.3785482347011566,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4573,
|
|
"step": 1214
|
|
},
|
|
{
|
|
"epoch": 0.33462901172990267,
|
|
"grad_norm": 0.3795594871044159,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4633,
|
|
"step": 1216
|
|
},
|
|
{
|
|
"epoch": 0.3351793883939321,
|
|
"grad_norm": 0.35303714871406555,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4701,
|
|
"step": 1218
|
|
},
|
|
{
|
|
"epoch": 0.33572976505796154,
|
|
"grad_norm": 0.3473946154117584,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4565,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.336280141721991,
|
|
"grad_norm": 0.36495375633239746,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4528,
|
|
"step": 1222
|
|
},
|
|
{
|
|
"epoch": 0.3368305183860204,
|
|
"grad_norm": 0.3617894649505615,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4756,
|
|
"step": 1224
|
|
},
|
|
{
|
|
"epoch": 0.33738089505004987,
|
|
"grad_norm": 0.36371487379074097,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4606,
|
|
"step": 1226
|
|
},
|
|
{
|
|
"epoch": 0.33793127171407933,
|
|
"grad_norm": 0.39192309975624084,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4435,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 0.3384816483781088,
|
|
"grad_norm": 0.3902663588523865,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4699,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.3390320250421382,
|
|
"grad_norm": 0.3662269115447998,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4627,
|
|
"step": 1232
|
|
},
|
|
{
|
|
"epoch": 0.33958240170616766,
|
|
"grad_norm": 0.3659150004386902,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4663,
|
|
"step": 1234
|
|
},
|
|
{
|
|
"epoch": 0.3401327783701971,
|
|
"grad_norm": 0.3632274568080902,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4499,
|
|
"step": 1236
|
|
},
|
|
{
|
|
"epoch": 0.3406831550342265,
|
|
"grad_norm": 0.38413625955581665,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4516,
|
|
"step": 1238
|
|
},
|
|
{
|
|
"epoch": 0.341233531698256,
|
|
"grad_norm": 0.35747644305229187,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4718,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.34178390836228545,
|
|
"grad_norm": 0.36938604712486267,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4568,
|
|
"step": 1242
|
|
},
|
|
{
|
|
"epoch": 0.3423342850263149,
|
|
"grad_norm": 0.38448217511177063,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.474,
|
|
"step": 1244
|
|
},
|
|
{
|
|
"epoch": 0.3428846616903443,
|
|
"grad_norm": 0.3694998323917389,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4516,
|
|
"step": 1246
|
|
},
|
|
{
|
|
"epoch": 0.3434350383543738,
|
|
"grad_norm": 0.41237321496009827,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4569,
|
|
"step": 1248
|
|
},
|
|
{
|
|
"epoch": 0.34398541501840324,
|
|
"grad_norm": 0.4058983325958252,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4657,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.34453579168243265,
|
|
"grad_norm": 0.3610474467277527,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4587,
|
|
"step": 1252
|
|
},
|
|
{
|
|
"epoch": 0.3450861683464621,
|
|
"grad_norm": 0.3664454221725464,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4656,
|
|
"step": 1254
|
|
},
|
|
{
|
|
"epoch": 0.34563654501049157,
|
|
"grad_norm": 0.35148540139198303,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4471,
|
|
"step": 1256
|
|
},
|
|
{
|
|
"epoch": 0.346186921674521,
|
|
"grad_norm": 0.35331565141677856,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4674,
|
|
"step": 1258
|
|
},
|
|
{
|
|
"epoch": 0.34673729833855044,
|
|
"grad_norm": 0.35367992520332336,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4572,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.3472876750025799,
|
|
"grad_norm": 0.36106035113334656,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.466,
|
|
"step": 1262
|
|
},
|
|
{
|
|
"epoch": 0.34783805166660936,
|
|
"grad_norm": 0.36034414172172546,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4412,
|
|
"step": 1264
|
|
},
|
|
{
|
|
"epoch": 0.34838842833063877,
|
|
"grad_norm": 0.3532898426055908,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4573,
|
|
"step": 1266
|
|
},
|
|
{
|
|
"epoch": 0.34893880499466823,
|
|
"grad_norm": 0.35383620858192444,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4644,
|
|
"step": 1268
|
|
},
|
|
{
|
|
"epoch": 0.3494891816586977,
|
|
"grad_norm": 0.3757399022579193,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4548,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.3500395583227271,
|
|
"grad_norm": 0.35997340083122253,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4664,
|
|
"step": 1272
|
|
},
|
|
{
|
|
"epoch": 0.35058993498675656,
|
|
"grad_norm": 0.3761090636253357,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4601,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 0.351140311650786,
|
|
"grad_norm": 0.33666959404945374,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4596,
|
|
"step": 1276
|
|
},
|
|
{
|
|
"epoch": 0.3516906883148154,
|
|
"grad_norm": 0.36252304911613464,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4688,
|
|
"step": 1278
|
|
},
|
|
{
|
|
"epoch": 0.3522410649788449,
|
|
"grad_norm": 0.3987884819507599,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4444,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.35279144164287435,
|
|
"grad_norm": 0.35914021730422974,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4508,
|
|
"step": 1282
|
|
},
|
|
{
|
|
"epoch": 0.3533418183069038,
|
|
"grad_norm": 0.36508429050445557,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4597,
|
|
"step": 1284
|
|
},
|
|
{
|
|
"epoch": 0.3538921949709332,
|
|
"grad_norm": 0.3923473060131073,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4594,
|
|
"step": 1286
|
|
},
|
|
{
|
|
"epoch": 0.3544425716349627,
|
|
"grad_norm": 0.38775792717933655,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4573,
|
|
"step": 1288
|
|
},
|
|
{
|
|
"epoch": 0.35499294829899214,
|
|
"grad_norm": 0.4628289043903351,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4732,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.35554332496302155,
|
|
"grad_norm": 0.35442307591438293,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4621,
|
|
"step": 1292
|
|
},
|
|
{
|
|
"epoch": 0.356093701627051,
|
|
"grad_norm": 0.3809347152709961,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4696,
|
|
"step": 1294
|
|
},
|
|
{
|
|
"epoch": 0.35664407829108047,
|
|
"grad_norm": 0.3683224618434906,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4649,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 0.35719445495510993,
|
|
"grad_norm": 0.3792459964752197,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.465,
|
|
"step": 1298
|
|
},
|
|
{
|
|
"epoch": 0.35774483161913934,
|
|
"grad_norm": 0.3704141080379486,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4572,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.3582952082831688,
|
|
"grad_norm": 0.3618161678314209,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4497,
|
|
"step": 1302
|
|
},
|
|
{
|
|
"epoch": 0.35884558494719826,
|
|
"grad_norm": 0.36538904905319214,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4525,
|
|
"step": 1304
|
|
},
|
|
{
|
|
"epoch": 0.35939596161122767,
|
|
"grad_norm": 0.36815035343170166,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4767,
|
|
"step": 1306
|
|
},
|
|
{
|
|
"epoch": 0.3599463382752571,
|
|
"grad_norm": 0.39006996154785156,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4809,
|
|
"step": 1308
|
|
},
|
|
{
|
|
"epoch": 0.3604967149392866,
|
|
"grad_norm": 0.3829619288444519,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4714,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.361047091603316,
|
|
"grad_norm": 0.37935730814933777,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4518,
|
|
"step": 1312
|
|
},
|
|
{
|
|
"epoch": 0.36159746826734546,
|
|
"grad_norm": 0.371320903301239,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4437,
|
|
"step": 1314
|
|
},
|
|
{
|
|
"epoch": 0.3621478449313749,
|
|
"grad_norm": 0.35784757137298584,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4579,
|
|
"step": 1316
|
|
},
|
|
{
|
|
"epoch": 0.3626982215954044,
|
|
"grad_norm": 0.36308974027633667,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4561,
|
|
"step": 1318
|
|
},
|
|
{
|
|
"epoch": 0.3632485982594338,
|
|
"grad_norm": 0.3538898825645447,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4574,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.36379897492346325,
|
|
"grad_norm": 0.3715920150279999,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4649,
|
|
"step": 1322
|
|
},
|
|
{
|
|
"epoch": 0.3643493515874927,
|
|
"grad_norm": 0.3698347806930542,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4648,
|
|
"step": 1324
|
|
},
|
|
{
|
|
"epoch": 0.3648997282515221,
|
|
"grad_norm": 0.3725499212741852,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4669,
|
|
"step": 1326
|
|
},
|
|
{
|
|
"epoch": 0.3654501049155516,
|
|
"grad_norm": 0.37399542331695557,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4615,
|
|
"step": 1328
|
|
},
|
|
{
|
|
"epoch": 0.36600048157958104,
|
|
"grad_norm": 0.35364219546318054,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4573,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.36655085824361044,
|
|
"grad_norm": 0.3651660084724426,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4485,
|
|
"step": 1332
|
|
},
|
|
{
|
|
"epoch": 0.3671012349076399,
|
|
"grad_norm": 0.3659324645996094,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4492,
|
|
"step": 1334
|
|
},
|
|
{
|
|
"epoch": 0.36765161157166937,
|
|
"grad_norm": 0.35941600799560547,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4727,
|
|
"step": 1336
|
|
},
|
|
{
|
|
"epoch": 0.36820198823569883,
|
|
"grad_norm": 0.35083696246147156,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4453,
|
|
"step": 1338
|
|
},
|
|
{
|
|
"epoch": 0.36875236489972824,
|
|
"grad_norm": 0.3690749406814575,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4582,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.3693027415637577,
|
|
"grad_norm": 0.3743647038936615,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4383,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 0.36985311822778716,
|
|
"grad_norm": 0.37354332208633423,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.466,
|
|
"step": 1344
|
|
},
|
|
{
|
|
"epoch": 0.37040349489181656,
|
|
"grad_norm": 0.3735334575176239,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4535,
|
|
"step": 1346
|
|
},
|
|
{
|
|
"epoch": 0.370953871555846,
|
|
"grad_norm": 0.37339311838150024,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.454,
|
|
"step": 1348
|
|
},
|
|
{
|
|
"epoch": 0.3715042482198755,
|
|
"grad_norm": 0.35196128487586975,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4685,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.37205462488390495,
|
|
"grad_norm": 0.4031345546245575,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4689,
|
|
"step": 1352
|
|
},
|
|
{
|
|
"epoch": 0.37260500154793436,
|
|
"grad_norm": 0.363320529460907,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.459,
|
|
"step": 1354
|
|
},
|
|
{
|
|
"epoch": 0.3731553782119638,
|
|
"grad_norm": 0.36146363615989685,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4446,
|
|
"step": 1356
|
|
},
|
|
{
|
|
"epoch": 0.3737057548759933,
|
|
"grad_norm": 0.36425283551216125,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.468,
|
|
"step": 1358
|
|
},
|
|
{
|
|
"epoch": 0.3742561315400227,
|
|
"grad_norm": 0.3795093894004822,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4513,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.37480650820405215,
|
|
"grad_norm": 0.37901571393013,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.464,
|
|
"step": 1362
|
|
},
|
|
{
|
|
"epoch": 0.3753568848680816,
|
|
"grad_norm": 0.3682788014411926,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4535,
|
|
"step": 1364
|
|
},
|
|
{
|
|
"epoch": 0.375907261532111,
|
|
"grad_norm": 0.38756048679351807,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4421,
|
|
"step": 1366
|
|
},
|
|
{
|
|
"epoch": 0.3764576381961405,
|
|
"grad_norm": 0.3859202563762665,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4601,
|
|
"step": 1368
|
|
},
|
|
{
|
|
"epoch": 0.37700801486016994,
|
|
"grad_norm": 0.3959304392337799,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4427,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.3775583915241994,
|
|
"grad_norm": 0.3768652379512787,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4483,
|
|
"step": 1372
|
|
},
|
|
{
|
|
"epoch": 0.3781087681882288,
|
|
"grad_norm": 0.37339305877685547,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4605,
|
|
"step": 1374
|
|
},
|
|
{
|
|
"epoch": 0.37865914485225827,
|
|
"grad_norm": 0.4036271572113037,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4546,
|
|
"step": 1376
|
|
},
|
|
{
|
|
"epoch": 0.37920952151628773,
|
|
"grad_norm": 0.35173818469047546,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4675,
|
|
"step": 1378
|
|
},
|
|
{
|
|
"epoch": 0.37975989818031713,
|
|
"grad_norm": 0.3682287335395813,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4676,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.3803102748443466,
|
|
"grad_norm": 0.37660422921180725,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4483,
|
|
"step": 1382
|
|
},
|
|
{
|
|
"epoch": 0.38086065150837606,
|
|
"grad_norm": 0.37428486347198486,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4537,
|
|
"step": 1384
|
|
},
|
|
{
|
|
"epoch": 0.38141102817240546,
|
|
"grad_norm": 0.36140507459640503,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4542,
|
|
"step": 1386
|
|
},
|
|
{
|
|
"epoch": 0.3819614048364349,
|
|
"grad_norm": 0.3818880021572113,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4546,
|
|
"step": 1388
|
|
},
|
|
{
|
|
"epoch": 0.3825117815004644,
|
|
"grad_norm": 0.3840683698654175,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4419,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.38306215816449385,
|
|
"grad_norm": 0.36933979392051697,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4632,
|
|
"step": 1392
|
|
},
|
|
{
|
|
"epoch": 0.38361253482852326,
|
|
"grad_norm": 0.3724002540111542,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.455,
|
|
"step": 1394
|
|
},
|
|
{
|
|
"epoch": 0.3841629114925527,
|
|
"grad_norm": 0.35783514380455017,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4652,
|
|
"step": 1396
|
|
},
|
|
{
|
|
"epoch": 0.3847132881565822,
|
|
"grad_norm": 0.36758366227149963,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4647,
|
|
"step": 1398
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"grad_norm": 0.3690735995769501,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4572,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_merge_loss": 0.39922505617141724,
|
|
"eval_merge_runtime": 600.7214,
|
|
"eval_merge_samples_per_second": 56.186,
|
|
"eval_merge_steps_per_second": 2.342,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.5239847898483276,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.3569,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 74.057,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 3.09,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_sharegpt_gpt4.json_loss": 0.7791606187820435,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.6746,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.754,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.463,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_Table_GPT.json_loss": 0.0626993179321289,
|
|
"eval_Table_GPT.json_runtime": 24.9542,
|
|
"eval_Table_GPT.json_samples_per_second": 83.874,
|
|
"eval_Table_GPT.json_steps_per_second": 3.526,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_gpt_4o_200k.json_loss": 0.8082922101020813,
|
|
"eval_gpt_4o_200k.json_runtime": 48.466,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.596,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.406,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_multi_turn_datas.json_loss": 0.3381649851799011,
|
|
"eval_multi_turn_datas.json_runtime": 75.5711,
|
|
"eval_multi_turn_datas.json_samples_per_second": 52.957,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.21,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_table_python_code_datas.json_loss": 0.2752579152584076,
|
|
"eval_table_python_code_datas.json_runtime": 43.0439,
|
|
"eval_table_python_code_datas.json_samples_per_second": 50.158,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.091,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_tabular_llm_data.json_loss": 0.11023548245429993,
|
|
"eval_tabular_llm_data.json_runtime": 8.5291,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.843,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.29,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_python_code_critic_21k.json_loss": 0.5756029486656189,
|
|
"eval_python_code_critic_21k.json_runtime": 3.2275,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 184.973,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.746,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_all_merge_table_dataset.json_loss": 0.08007320761680603,
|
|
"eval_all_merge_table_dataset.json_runtime": 23.3,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 30.558,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.288,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.5849318504333496,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.4131,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 67.905,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.838,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_ultrainteract_sft.json_loss": 0.4235917031764984,
|
|
"eval_ultrainteract_sft.json_runtime": 8.6815,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 167.713,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.026,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.10058007389307022,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.1256,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 270.794,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.929,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_sft_react_sql_datas.json_loss": 0.63919597864151,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.8177,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 40.165,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.791,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_all_merge_code.json_loss": 0.293491929769516,
|
|
"eval_all_merge_code.json_runtime": 0.3331,
|
|
"eval_all_merge_code.json_samples_per_second": 189.11,
|
|
"eval_all_merge_code.json_steps_per_second": 9.005,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_magpie_datas.json_loss": 0.43307721614837646,
|
|
"eval_magpie_datas.json_runtime": 2.214,
|
|
"eval_magpie_datas.json_samples_per_second": 77.687,
|
|
"eval_magpie_datas.json_steps_per_second": 3.613,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_train_data_for_qwen.json_loss": 0.004504092503339052,
|
|
"eval_train_data_for_qwen.json_runtime": 0.2448,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 40.845,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.084,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_alpaca_cleaned.json_loss": 0.9073267579078674,
|
|
"eval_alpaca_cleaned.json_runtime": 0.1148,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 235.221,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.424,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_agent_instruct.json_loss": 0.22197985649108887,
|
|
"eval_agent_instruct.json_runtime": 0.5129,
|
|
"eval_agent_instruct.json_samples_per_second": 93.586,
|
|
"eval_agent_instruct.json_steps_per_second": 3.899,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_MathInstruct.json_loss": 0.2014550119638443,
|
|
"eval_MathInstruct.json_runtime": 0.3655,
|
|
"eval_MathInstruct.json_samples_per_second": 155.953,
|
|
"eval_MathInstruct.json_steps_per_second": 8.208,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.44645121693611145,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.3016,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 112.74,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.632,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.009633864276111126,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.0999,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 230.172,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 10.007,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.5636402368545532,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.0503,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 318.002,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.875,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_alpaca_gpt4_zh.json_loss": 0.9602435231208801,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.0502,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 219.067,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 19.915,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.3852636648206116,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.5993592143058777,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.4852,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 41.223,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.061,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.38581404148464105,
|
|
"grad_norm": 0.36705121397972107,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4617,
|
|
"step": 1402
|
|
},
|
|
{
|
|
"epoch": 0.3863644181486705,
|
|
"grad_norm": 0.3653152883052826,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4528,
|
|
"step": 1404
|
|
},
|
|
{
|
|
"epoch": 0.38691479481269997,
|
|
"grad_norm": 0.34426313638687134,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4464,
|
|
"step": 1406
|
|
},
|
|
{
|
|
"epoch": 0.3874651714767294,
|
|
"grad_norm": 0.3493911623954773,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4638,
|
|
"step": 1408
|
|
},
|
|
{
|
|
"epoch": 0.38801554814075884,
|
|
"grad_norm": 0.3841487765312195,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4471,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.3885659248047883,
|
|
"grad_norm": 0.3770912289619446,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4623,
|
|
"step": 1412
|
|
},
|
|
{
|
|
"epoch": 0.3891163014688177,
|
|
"grad_norm": 0.38141822814941406,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4583,
|
|
"step": 1414
|
|
},
|
|
{
|
|
"epoch": 0.38966667813284717,
|
|
"grad_norm": 0.3774464726448059,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4574,
|
|
"step": 1416
|
|
},
|
|
{
|
|
"epoch": 0.39021705479687663,
|
|
"grad_norm": 0.35681846737861633,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4443,
|
|
"step": 1418
|
|
},
|
|
{
|
|
"epoch": 0.39076743146090603,
|
|
"grad_norm": 0.3700469732284546,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4468,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.3913178081249355,
|
|
"grad_norm": 0.35229384899139404,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.456,
|
|
"step": 1422
|
|
},
|
|
{
|
|
"epoch": 0.39186818478896496,
|
|
"grad_norm": 0.3469116687774658,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.451,
|
|
"step": 1424
|
|
},
|
|
{
|
|
"epoch": 0.3924185614529944,
|
|
"grad_norm": 0.36313918232917786,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4679,
|
|
"step": 1426
|
|
},
|
|
{
|
|
"epoch": 0.3929689381170238,
|
|
"grad_norm": 0.3543436527252197,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.464,
|
|
"step": 1428
|
|
},
|
|
{
|
|
"epoch": 0.3935193147810533,
|
|
"grad_norm": 0.3992765545845032,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.486,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.39406969144508275,
|
|
"grad_norm": 0.36149340867996216,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4426,
|
|
"step": 1432
|
|
},
|
|
{
|
|
"epoch": 0.39462006810911215,
|
|
"grad_norm": 0.37118762731552124,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4531,
|
|
"step": 1434
|
|
},
|
|
{
|
|
"epoch": 0.3951704447731416,
|
|
"grad_norm": 0.3618330955505371,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4621,
|
|
"step": 1436
|
|
},
|
|
{
|
|
"epoch": 0.3957208214371711,
|
|
"grad_norm": 0.37272128462791443,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4616,
|
|
"step": 1438
|
|
},
|
|
{
|
|
"epoch": 0.3962711981012005,
|
|
"grad_norm": 0.3678719997406006,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4477,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.39682157476522995,
|
|
"grad_norm": 0.342907190322876,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4484,
|
|
"step": 1442
|
|
},
|
|
{
|
|
"epoch": 0.3973719514292594,
|
|
"grad_norm": 0.3722037374973297,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4576,
|
|
"step": 1444
|
|
},
|
|
{
|
|
"epoch": 0.39792232809328887,
|
|
"grad_norm": 0.3829335868358612,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4568,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 0.3984727047573183,
|
|
"grad_norm": 0.36857596039772034,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4509,
|
|
"step": 1448
|
|
},
|
|
{
|
|
"epoch": 0.39902308142134774,
|
|
"grad_norm": 0.36784934997558594,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.46,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.3995734580853772,
|
|
"grad_norm": 0.36996331810951233,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4435,
|
|
"step": 1452
|
|
},
|
|
{
|
|
"epoch": 0.4001238347494066,
|
|
"grad_norm": 0.3608056604862213,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4467,
|
|
"step": 1454
|
|
},
|
|
{
|
|
"epoch": 0.40067421141343607,
|
|
"grad_norm": 0.3827229142189026,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4576,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 0.4012245880774655,
|
|
"grad_norm": 0.38073116540908813,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4433,
|
|
"step": 1458
|
|
},
|
|
{
|
|
"epoch": 0.40177496474149493,
|
|
"grad_norm": 0.3861468434333801,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4466,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.4023253414055244,
|
|
"grad_norm": 0.36093631386756897,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4409,
|
|
"step": 1462
|
|
},
|
|
{
|
|
"epoch": 0.40287571806955386,
|
|
"grad_norm": 0.34549927711486816,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4507,
|
|
"step": 1464
|
|
},
|
|
{
|
|
"epoch": 0.4034260947335833,
|
|
"grad_norm": 0.3782083988189697,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4648,
|
|
"step": 1466
|
|
},
|
|
{
|
|
"epoch": 0.4039764713976127,
|
|
"grad_norm": 0.366914302110672,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.462,
|
|
"step": 1468
|
|
},
|
|
{
|
|
"epoch": 0.4045268480616422,
|
|
"grad_norm": 0.3604414761066437,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4639,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.40507722472567165,
|
|
"grad_norm": 0.3806079924106598,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.452,
|
|
"step": 1472
|
|
},
|
|
{
|
|
"epoch": 0.40562760138970105,
|
|
"grad_norm": 0.36079150438308716,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4534,
|
|
"step": 1474
|
|
},
|
|
{
|
|
"epoch": 0.4061779780537305,
|
|
"grad_norm": 0.3526926040649414,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4483,
|
|
"step": 1476
|
|
},
|
|
{
|
|
"epoch": 0.40672835471776,
|
|
"grad_norm": 0.36440181732177734,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4445,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 0.40727873138178944,
|
|
"grad_norm": 0.3452344238758087,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4531,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.40782910804581884,
|
|
"grad_norm": 0.3774935007095337,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4644,
|
|
"step": 1482
|
|
},
|
|
{
|
|
"epoch": 0.4083794847098483,
|
|
"grad_norm": 0.3485760986804962,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4489,
|
|
"step": 1484
|
|
},
|
|
{
|
|
"epoch": 0.40892986137387777,
|
|
"grad_norm": 0.3787960708141327,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4682,
|
|
"step": 1486
|
|
},
|
|
{
|
|
"epoch": 0.4094802380379072,
|
|
"grad_norm": 0.38031846284866333,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.462,
|
|
"step": 1488
|
|
},
|
|
{
|
|
"epoch": 0.41003061470193664,
|
|
"grad_norm": 0.3756881654262543,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4514,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.4105809913659661,
|
|
"grad_norm": 0.3663581311702728,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4482,
|
|
"step": 1492
|
|
},
|
|
{
|
|
"epoch": 0.4111313680299955,
|
|
"grad_norm": 0.35938966274261475,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4471,
|
|
"step": 1494
|
|
},
|
|
{
|
|
"epoch": 0.41168174469402496,
|
|
"grad_norm": 0.3561854064464569,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4514,
|
|
"step": 1496
|
|
},
|
|
{
|
|
"epoch": 0.4122321213580544,
|
|
"grad_norm": 0.36052775382995605,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4564,
|
|
"step": 1498
|
|
},
|
|
{
|
|
"epoch": 0.4127824980220839,
|
|
"grad_norm": 0.3753555119037628,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4543,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.4133328746861133,
|
|
"grad_norm": 0.3747691810131073,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4588,
|
|
"step": 1502
|
|
},
|
|
{
|
|
"epoch": 0.41388325135014276,
|
|
"grad_norm": 0.3654341399669647,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.451,
|
|
"step": 1504
|
|
},
|
|
{
|
|
"epoch": 0.4144336280141722,
|
|
"grad_norm": 0.3624642491340637,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4528,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 0.4149840046782016,
|
|
"grad_norm": 0.3465966284275055,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.45,
|
|
"step": 1508
|
|
},
|
|
{
|
|
"epoch": 0.4155343813422311,
|
|
"grad_norm": 0.38202422857284546,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4459,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.41608475800626055,
|
|
"grad_norm": 0.3562781512737274,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4375,
|
|
"step": 1512
|
|
},
|
|
{
|
|
"epoch": 0.41663513467028995,
|
|
"grad_norm": 0.36660805344581604,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4511,
|
|
"step": 1514
|
|
},
|
|
{
|
|
"epoch": 0.4171855113343194,
|
|
"grad_norm": 0.36541464924812317,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4618,
|
|
"step": 1516
|
|
},
|
|
{
|
|
"epoch": 0.4177358879983489,
|
|
"grad_norm": 0.3570851981639862,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4568,
|
|
"step": 1518
|
|
},
|
|
{
|
|
"epoch": 0.41828626466237834,
|
|
"grad_norm": 0.3508870005607605,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4492,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.41883664132640774,
|
|
"grad_norm": 0.35050973296165466,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4481,
|
|
"step": 1522
|
|
},
|
|
{
|
|
"epoch": 0.4193870179904372,
|
|
"grad_norm": 0.3564668297767639,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4461,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 0.41993739465446667,
|
|
"grad_norm": 0.3646043539047241,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4554,
|
|
"step": 1526
|
|
},
|
|
{
|
|
"epoch": 0.4204877713184961,
|
|
"grad_norm": 0.3904356360435486,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4731,
|
|
"step": 1528
|
|
},
|
|
{
|
|
"epoch": 0.42103814798252553,
|
|
"grad_norm": 0.37373483180999756,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4679,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.421588524646555,
|
|
"grad_norm": 0.3704439699649811,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4706,
|
|
"step": 1532
|
|
},
|
|
{
|
|
"epoch": 0.42213890131058446,
|
|
"grad_norm": 0.37894484400749207,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4515,
|
|
"step": 1534
|
|
},
|
|
{
|
|
"epoch": 0.42268927797461386,
|
|
"grad_norm": 0.3871210217475891,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4477,
|
|
"step": 1536
|
|
},
|
|
{
|
|
"epoch": 0.4232396546386433,
|
|
"grad_norm": 0.3755747079849243,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4633,
|
|
"step": 1538
|
|
},
|
|
{
|
|
"epoch": 0.4237900313026728,
|
|
"grad_norm": 0.359764039516449,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4798,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.4243404079667022,
|
|
"grad_norm": 0.37172380089759827,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4383,
|
|
"step": 1542
|
|
},
|
|
{
|
|
"epoch": 0.42489078463073165,
|
|
"grad_norm": 0.3501332700252533,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.442,
|
|
"step": 1544
|
|
},
|
|
{
|
|
"epoch": 0.4254411612947611,
|
|
"grad_norm": 0.3552211821079254,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4539,
|
|
"step": 1546
|
|
},
|
|
{
|
|
"epoch": 0.4259915379587905,
|
|
"grad_norm": 0.35052230954170227,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.428,
|
|
"step": 1548
|
|
},
|
|
{
|
|
"epoch": 0.42654191462282,
|
|
"grad_norm": 0.3710823357105255,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4297,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.42709229128684945,
|
|
"grad_norm": 0.37135034799575806,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4587,
|
|
"step": 1552
|
|
},
|
|
{
|
|
"epoch": 0.4276426679508789,
|
|
"grad_norm": 0.3729698061943054,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4585,
|
|
"step": 1554
|
|
},
|
|
{
|
|
"epoch": 0.4281930446149083,
|
|
"grad_norm": 0.3525015711784363,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.459,
|
|
"step": 1556
|
|
},
|
|
{
|
|
"epoch": 0.4287434212789378,
|
|
"grad_norm": 0.38500455021858215,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4469,
|
|
"step": 1558
|
|
},
|
|
{
|
|
"epoch": 0.42929379794296724,
|
|
"grad_norm": 0.3852159380912781,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4421,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.42984417460699664,
|
|
"grad_norm": 0.3567640781402588,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4538,
|
|
"step": 1562
|
|
},
|
|
{
|
|
"epoch": 0.4303945512710261,
|
|
"grad_norm": 0.36795344948768616,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4432,
|
|
"step": 1564
|
|
},
|
|
{
|
|
"epoch": 0.43094492793505557,
|
|
"grad_norm": 0.37614256143569946,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4631,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 0.431495304599085,
|
|
"grad_norm": 0.356991171836853,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4389,
|
|
"step": 1568
|
|
},
|
|
{
|
|
"epoch": 0.43204568126311443,
|
|
"grad_norm": 0.3793700933456421,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4609,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.4325960579271439,
|
|
"grad_norm": 0.36675581336021423,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4484,
|
|
"step": 1572
|
|
},
|
|
{
|
|
"epoch": 0.43314643459117336,
|
|
"grad_norm": 0.36404114961624146,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.45,
|
|
"step": 1574
|
|
},
|
|
{
|
|
"epoch": 0.43369681125520276,
|
|
"grad_norm": 0.3868160843849182,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4652,
|
|
"step": 1576
|
|
},
|
|
{
|
|
"epoch": 0.4342471879192322,
|
|
"grad_norm": 0.3898649215698242,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4612,
|
|
"step": 1578
|
|
},
|
|
{
|
|
"epoch": 0.4347975645832617,
|
|
"grad_norm": 0.36762335896492004,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4543,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.4353479412472911,
|
|
"grad_norm": 0.3434213101863861,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4423,
|
|
"step": 1582
|
|
},
|
|
{
|
|
"epoch": 0.43589831791132055,
|
|
"grad_norm": 0.3741122782230377,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4638,
|
|
"step": 1584
|
|
},
|
|
{
|
|
"epoch": 0.43644869457535,
|
|
"grad_norm": 0.38991764187812805,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.438,
|
|
"step": 1586
|
|
},
|
|
{
|
|
"epoch": 0.4369990712393795,
|
|
"grad_norm": 0.35284510254859924,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4559,
|
|
"step": 1588
|
|
},
|
|
{
|
|
"epoch": 0.4375494479034089,
|
|
"grad_norm": 0.36775341629981995,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4594,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.43809982456743835,
|
|
"grad_norm": 0.3677217364311218,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.451,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 0.4386502012314678,
|
|
"grad_norm": 0.35295674204826355,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4506,
|
|
"step": 1594
|
|
},
|
|
{
|
|
"epoch": 0.4392005778954972,
|
|
"grad_norm": 0.3770224452018738,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4506,
|
|
"step": 1596
|
|
},
|
|
{
|
|
"epoch": 0.4397509545595267,
|
|
"grad_norm": 0.3824670612812042,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4633,
|
|
"step": 1598
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"grad_norm": 0.38165828585624695,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4458,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_merge_loss": 0.39449170231819153,
|
|
"eval_merge_runtime": 599.3899,
|
|
"eval_merge_samples_per_second": 56.311,
|
|
"eval_merge_steps_per_second": 2.347,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.5198476314544678,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.3548,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 74.072,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 3.09,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_sharegpt_gpt4.json_loss": 0.7743993997573853,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.7173,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.675,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.459,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_Table_GPT.json_loss": 0.05817935988306999,
|
|
"eval_Table_GPT.json_runtime": 25.0301,
|
|
"eval_Table_GPT.json_samples_per_second": 83.619,
|
|
"eval_Table_GPT.json_steps_per_second": 3.516,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_gpt_4o_200k.json_loss": 0.8023759126663208,
|
|
"eval_gpt_4o_200k.json_runtime": 48.5498,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.372,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.397,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_multi_turn_datas.json_loss": 0.3328835964202881,
|
|
"eval_multi_turn_datas.json_runtime": 75.669,
|
|
"eval_multi_turn_datas.json_samples_per_second": 52.888,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.207,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_table_python_code_datas.json_loss": 0.2713072597980499,
|
|
"eval_table_python_code_datas.json_runtime": 43.1148,
|
|
"eval_table_python_code_datas.json_samples_per_second": 50.076,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.087,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_tabular_llm_data.json_loss": 0.10233539342880249,
|
|
"eval_tabular_llm_data.json_runtime": 8.5788,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.675,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.282,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_python_code_critic_21k.json_loss": 0.5702229142189026,
|
|
"eval_python_code_critic_21k.json_runtime": 3.2319,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 184.719,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.735,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_all_merge_table_dataset.json_loss": 0.07606548815965652,
|
|
"eval_all_merge_table_dataset.json_runtime": 23.3911,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 30.439,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.283,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.5824379324913025,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.5207,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 67.68,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.829,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_ultrainteract_sft.json_loss": 0.42119815945625305,
|
|
"eval_ultrainteract_sft.json_runtime": 8.677,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 167.801,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.03,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.09474331140518188,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.1262,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 269.361,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.845,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_sft_react_sql_datas.json_loss": 0.6350359320640564,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.869,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 39.903,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.779,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_all_merge_code.json_loss": 0.2929154634475708,
|
|
"eval_all_merge_code.json_runtime": 0.3373,
|
|
"eval_all_merge_code.json_samples_per_second": 186.752,
|
|
"eval_all_merge_code.json_steps_per_second": 8.893,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_magpie_datas.json_loss": 0.4318141043186188,
|
|
"eval_magpie_datas.json_runtime": 2.2195,
|
|
"eval_magpie_datas.json_samples_per_second": 77.496,
|
|
"eval_magpie_datas.json_steps_per_second": 3.604,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_train_data_for_qwen.json_loss": 0.00419951044023037,
|
|
"eval_train_data_for_qwen.json_runtime": 0.2455,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 40.733,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.073,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_alpaca_cleaned.json_loss": 0.910367488861084,
|
|
"eval_alpaca_cleaned.json_runtime": 0.1147,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 235.312,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.431,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_agent_instruct.json_loss": 0.21950356662273407,
|
|
"eval_agent_instruct.json_runtime": 0.5156,
|
|
"eval_agent_instruct.json_samples_per_second": 93.094,
|
|
"eval_agent_instruct.json_steps_per_second": 3.879,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_MathInstruct.json_loss": 0.19855839014053345,
|
|
"eval_MathInstruct.json_runtime": 0.3654,
|
|
"eval_MathInstruct.json_samples_per_second": 155.99,
|
|
"eval_MathInstruct.json_steps_per_second": 8.21,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.4433169662952423,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.3031,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 112.164,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.598,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.008965943939983845,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.1008,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 228.26,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 9.924,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.560943603515625,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.0505,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 316.662,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.791,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_alpaca_gpt4_zh.json_loss": 0.9813264012336731,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.05,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 219.867,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 19.988,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44030133122355614,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.5885769128799438,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.4829,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 41.42,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.071,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.44085170788758554,
|
|
"grad_norm": 0.36969345808029175,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4474,
|
|
"step": 1602
|
|
},
|
|
{
|
|
"epoch": 0.441402084551615,
|
|
"grad_norm": 0.3673281967639923,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4566,
|
|
"step": 1604
|
|
},
|
|
{
|
|
"epoch": 0.44195246121564447,
|
|
"grad_norm": 0.3695686459541321,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4602,
|
|
"step": 1606
|
|
},
|
|
{
|
|
"epoch": 0.4425028378796739,
|
|
"grad_norm": 0.3653704822063446,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4489,
|
|
"step": 1608
|
|
},
|
|
{
|
|
"epoch": 0.44305321454370333,
|
|
"grad_norm": 0.37890321016311646,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4588,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.4436035912077328,
|
|
"grad_norm": 0.34637650847435,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4554,
|
|
"step": 1612
|
|
},
|
|
{
|
|
"epoch": 0.44415396787176226,
|
|
"grad_norm": 0.3733616769313812,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4477,
|
|
"step": 1614
|
|
},
|
|
{
|
|
"epoch": 0.44470434453579166,
|
|
"grad_norm": 0.3740238547325134,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4528,
|
|
"step": 1616
|
|
},
|
|
{
|
|
"epoch": 0.4452547211998211,
|
|
"grad_norm": 0.35610541701316833,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4487,
|
|
"step": 1618
|
|
},
|
|
{
|
|
"epoch": 0.4458050978638506,
|
|
"grad_norm": 0.362763911485672,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4619,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.44635547452788,
|
|
"grad_norm": 0.3781318962574005,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4481,
|
|
"step": 1622
|
|
},
|
|
{
|
|
"epoch": 0.44690585119190945,
|
|
"grad_norm": 0.40836694836616516,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4597,
|
|
"step": 1624
|
|
},
|
|
{
|
|
"epoch": 0.4474562278559389,
|
|
"grad_norm": 0.3662070035934448,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4466,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 0.4480066045199684,
|
|
"grad_norm": 0.37797635793685913,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4589,
|
|
"step": 1628
|
|
},
|
|
{
|
|
"epoch": 0.4485569811839978,
|
|
"grad_norm": 0.3544275462627411,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4549,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.44910735784802724,
|
|
"grad_norm": 0.36321336030960083,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.443,
|
|
"step": 1632
|
|
},
|
|
{
|
|
"epoch": 0.4496577345120567,
|
|
"grad_norm": 0.45478886365890503,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4343,
|
|
"step": 1634
|
|
},
|
|
{
|
|
"epoch": 0.4502081111760861,
|
|
"grad_norm": 0.3670060336589813,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4463,
|
|
"step": 1636
|
|
},
|
|
{
|
|
"epoch": 0.4507584878401156,
|
|
"grad_norm": 0.381145715713501,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4512,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 0.45130886450414504,
|
|
"grad_norm": 0.3729204833507538,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.451,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.4518592411681745,
|
|
"grad_norm": 0.36986637115478516,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4622,
|
|
"step": 1642
|
|
},
|
|
{
|
|
"epoch": 0.4524096178322039,
|
|
"grad_norm": 0.37230783700942993,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4377,
|
|
"step": 1644
|
|
},
|
|
{
|
|
"epoch": 0.45295999449623336,
|
|
"grad_norm": 0.3671816885471344,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4433,
|
|
"step": 1646
|
|
},
|
|
{
|
|
"epoch": 0.4535103711602628,
|
|
"grad_norm": 0.359372615814209,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4512,
|
|
"step": 1648
|
|
},
|
|
{
|
|
"epoch": 0.45406074782429223,
|
|
"grad_norm": 0.3682217001914978,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4478,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.4546111244883217,
|
|
"grad_norm": 0.3779531419277191,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4446,
|
|
"step": 1652
|
|
},
|
|
{
|
|
"epoch": 0.45516150115235116,
|
|
"grad_norm": 0.3579237759113312,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4432,
|
|
"step": 1654
|
|
},
|
|
{
|
|
"epoch": 0.45571187781638056,
|
|
"grad_norm": 0.35086673498153687,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4511,
|
|
"step": 1656
|
|
},
|
|
{
|
|
"epoch": 0.45626225448041,
|
|
"grad_norm": 0.36263635754585266,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4552,
|
|
"step": 1658
|
|
},
|
|
{
|
|
"epoch": 0.4568126311444395,
|
|
"grad_norm": 0.3715769648551941,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4549,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.45736300780846895,
|
|
"grad_norm": 0.36989322304725647,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4468,
|
|
"step": 1662
|
|
},
|
|
{
|
|
"epoch": 0.45791338447249835,
|
|
"grad_norm": 0.35716795921325684,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4506,
|
|
"step": 1664
|
|
},
|
|
{
|
|
"epoch": 0.4584637611365278,
|
|
"grad_norm": 0.36870133876800537,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4581,
|
|
"step": 1666
|
|
},
|
|
{
|
|
"epoch": 0.4590141378005573,
|
|
"grad_norm": 0.36808547377586365,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4518,
|
|
"step": 1668
|
|
},
|
|
{
|
|
"epoch": 0.4595645144645867,
|
|
"grad_norm": 0.3777028024196625,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4526,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.46011489112861614,
|
|
"grad_norm": 0.3849789798259735,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.452,
|
|
"step": 1672
|
|
},
|
|
{
|
|
"epoch": 0.4606652677926456,
|
|
"grad_norm": 0.38168811798095703,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4408,
|
|
"step": 1674
|
|
},
|
|
{
|
|
"epoch": 0.461215644456675,
|
|
"grad_norm": 0.3601077198982239,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4415,
|
|
"step": 1676
|
|
},
|
|
{
|
|
"epoch": 0.4617660211207045,
|
|
"grad_norm": 0.3658849596977234,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4461,
|
|
"step": 1678
|
|
},
|
|
{
|
|
"epoch": 0.46231639778473393,
|
|
"grad_norm": 0.3822179138660431,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4585,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.4628667744487634,
|
|
"grad_norm": 0.38321495056152344,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4469,
|
|
"step": 1682
|
|
},
|
|
{
|
|
"epoch": 0.4634171511127928,
|
|
"grad_norm": 0.3911297917366028,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4522,
|
|
"step": 1684
|
|
},
|
|
{
|
|
"epoch": 0.46396752777682226,
|
|
"grad_norm": 0.38053110241889954,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4487,
|
|
"step": 1686
|
|
},
|
|
{
|
|
"epoch": 0.4645179044408517,
|
|
"grad_norm": 0.3704802691936493,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4436,
|
|
"step": 1688
|
|
},
|
|
{
|
|
"epoch": 0.46506828110488113,
|
|
"grad_norm": 0.3804566562175751,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4419,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.4656186577689106,
|
|
"grad_norm": 0.3807014524936676,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4526,
|
|
"step": 1692
|
|
},
|
|
{
|
|
"epoch": 0.46616903443294005,
|
|
"grad_norm": 0.3678591549396515,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4579,
|
|
"step": 1694
|
|
},
|
|
{
|
|
"epoch": 0.46671941109696946,
|
|
"grad_norm": 0.37586984038352966,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4404,
|
|
"step": 1696
|
|
},
|
|
{
|
|
"epoch": 0.4672697877609989,
|
|
"grad_norm": 0.36084264516830444,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4398,
|
|
"step": 1698
|
|
},
|
|
{
|
|
"epoch": 0.4678201644250284,
|
|
"grad_norm": 0.36694666743278503,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4369,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.46837054108905785,
|
|
"grad_norm": 0.4061066210269928,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4495,
|
|
"step": 1702
|
|
},
|
|
{
|
|
"epoch": 0.46892091775308725,
|
|
"grad_norm": 0.37329551577568054,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4482,
|
|
"step": 1704
|
|
},
|
|
{
|
|
"epoch": 0.4694712944171167,
|
|
"grad_norm": 0.39072346687316895,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4506,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 0.4700216710811462,
|
|
"grad_norm": 0.3565053343772888,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.447,
|
|
"step": 1708
|
|
},
|
|
{
|
|
"epoch": 0.4705720477451756,
|
|
"grad_norm": 0.39754360914230347,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4468,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.47112242440920504,
|
|
"grad_norm": 0.34416159987449646,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4509,
|
|
"step": 1712
|
|
},
|
|
{
|
|
"epoch": 0.4716728010732345,
|
|
"grad_norm": 0.3646188974380493,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4436,
|
|
"step": 1714
|
|
},
|
|
{
|
|
"epoch": 0.47222317773726397,
|
|
"grad_norm": 0.372549831867218,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4622,
|
|
"step": 1716
|
|
},
|
|
{
|
|
"epoch": 0.47277355440129337,
|
|
"grad_norm": 0.34616753458976746,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4513,
|
|
"step": 1718
|
|
},
|
|
{
|
|
"epoch": 0.47332393106532283,
|
|
"grad_norm": 0.39396756887435913,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4464,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.4738743077293523,
|
|
"grad_norm": 0.3681057095527649,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4514,
|
|
"step": 1722
|
|
},
|
|
{
|
|
"epoch": 0.4744246843933817,
|
|
"grad_norm": 0.38942328095436096,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4603,
|
|
"step": 1724
|
|
},
|
|
{
|
|
"epoch": 0.47497506105741116,
|
|
"grad_norm": 0.380278617143631,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4463,
|
|
"step": 1726
|
|
},
|
|
{
|
|
"epoch": 0.4755254377214406,
|
|
"grad_norm": 0.37930282950401306,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4377,
|
|
"step": 1728
|
|
},
|
|
{
|
|
"epoch": 0.47607581438547003,
|
|
"grad_norm": 0.36719146370887756,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4285,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.4766261910494995,
|
|
"grad_norm": 0.3802686035633087,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4346,
|
|
"step": 1732
|
|
},
|
|
{
|
|
"epoch": 0.47717656771352895,
|
|
"grad_norm": 0.3655955493450165,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4504,
|
|
"step": 1734
|
|
},
|
|
{
|
|
"epoch": 0.4777269443775584,
|
|
"grad_norm": 0.34403982758522034,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4502,
|
|
"step": 1736
|
|
},
|
|
{
|
|
"epoch": 0.4782773210415878,
|
|
"grad_norm": 0.35954922437667847,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4313,
|
|
"step": 1738
|
|
},
|
|
{
|
|
"epoch": 0.4788276977056173,
|
|
"grad_norm": 0.3489810824394226,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4479,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.47937807436964675,
|
|
"grad_norm": 0.3789598047733307,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4488,
|
|
"step": 1742
|
|
},
|
|
{
|
|
"epoch": 0.47992845103367615,
|
|
"grad_norm": 0.38226747512817383,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4612,
|
|
"step": 1744
|
|
},
|
|
{
|
|
"epoch": 0.4804788276977056,
|
|
"grad_norm": 0.36648547649383545,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4521,
|
|
"step": 1746
|
|
},
|
|
{
|
|
"epoch": 0.4810292043617351,
|
|
"grad_norm": 0.36434775590896606,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4579,
|
|
"step": 1748
|
|
},
|
|
{
|
|
"epoch": 0.4815795810257645,
|
|
"grad_norm": 0.3805695176124573,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.437,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.48212995768979394,
|
|
"grad_norm": 0.34234747290611267,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4411,
|
|
"step": 1752
|
|
},
|
|
{
|
|
"epoch": 0.4826803343538234,
|
|
"grad_norm": 0.356953501701355,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4563,
|
|
"step": 1754
|
|
},
|
|
{
|
|
"epoch": 0.48323071101785287,
|
|
"grad_norm": 0.35372647643089294,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4506,
|
|
"step": 1756
|
|
},
|
|
{
|
|
"epoch": 0.48378108768188227,
|
|
"grad_norm": 0.3776678442955017,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4517,
|
|
"step": 1758
|
|
},
|
|
{
|
|
"epoch": 0.48433146434591173,
|
|
"grad_norm": 0.336029052734375,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4387,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.4848818410099412,
|
|
"grad_norm": 0.35482755303382874,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4456,
|
|
"step": 1762
|
|
},
|
|
{
|
|
"epoch": 0.4854322176739706,
|
|
"grad_norm": 0.3713533580303192,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4616,
|
|
"step": 1764
|
|
},
|
|
{
|
|
"epoch": 0.48598259433800006,
|
|
"grad_norm": 0.348069965839386,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4504,
|
|
"step": 1766
|
|
},
|
|
{
|
|
"epoch": 0.4865329710020295,
|
|
"grad_norm": 0.36832061409950256,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.45,
|
|
"step": 1768
|
|
},
|
|
{
|
|
"epoch": 0.487083347666059,
|
|
"grad_norm": 0.3665439486503601,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4525,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.4876337243300884,
|
|
"grad_norm": 0.39572247862815857,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4521,
|
|
"step": 1772
|
|
},
|
|
{
|
|
"epoch": 0.48818410099411785,
|
|
"grad_norm": 0.36583212018013,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4298,
|
|
"step": 1774
|
|
},
|
|
{
|
|
"epoch": 0.4887344776581473,
|
|
"grad_norm": 0.35969898104667664,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4497,
|
|
"step": 1776
|
|
},
|
|
{
|
|
"epoch": 0.4892848543221767,
|
|
"grad_norm": 0.3651510775089264,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4355,
|
|
"step": 1778
|
|
},
|
|
{
|
|
"epoch": 0.4898352309862062,
|
|
"grad_norm": 0.3885847330093384,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4633,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.49038560765023564,
|
|
"grad_norm": 0.357166588306427,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4512,
|
|
"step": 1782
|
|
},
|
|
{
|
|
"epoch": 0.49093598431426505,
|
|
"grad_norm": 0.34748879075050354,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.437,
|
|
"step": 1784
|
|
},
|
|
{
|
|
"epoch": 0.4914863609782945,
|
|
"grad_norm": 0.371999055147171,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4493,
|
|
"step": 1786
|
|
},
|
|
{
|
|
"epoch": 0.492036737642324,
|
|
"grad_norm": 0.3602544665336609,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4413,
|
|
"step": 1788
|
|
},
|
|
{
|
|
"epoch": 0.49258711430635344,
|
|
"grad_norm": 0.38811835646629333,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4406,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.49313749097038284,
|
|
"grad_norm": 0.366616427898407,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4587,
|
|
"step": 1792
|
|
},
|
|
{
|
|
"epoch": 0.4936878676344123,
|
|
"grad_norm": 0.39588844776153564,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4525,
|
|
"step": 1794
|
|
},
|
|
{
|
|
"epoch": 0.49423824429844176,
|
|
"grad_norm": 0.3641244173049927,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4533,
|
|
"step": 1796
|
|
},
|
|
{
|
|
"epoch": 0.49478862096247117,
|
|
"grad_norm": 0.35738009214401245,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4542,
|
|
"step": 1798
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"grad_norm": 0.36343181133270264,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4527,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_merge_loss": 0.3900485932826996,
|
|
"eval_merge_runtime": 600.246,
|
|
"eval_merge_samples_per_second": 56.23,
|
|
"eval_merge_steps_per_second": 2.344,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.5161438584327698,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.4655,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 73.288,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 3.058,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_sharegpt_gpt4.json_loss": 0.7699668407440186,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.6447,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.809,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.465,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_Table_GPT.json_loss": 0.057397227734327316,
|
|
"eval_Table_GPT.json_runtime": 24.974,
|
|
"eval_Table_GPT.json_samples_per_second": 83.807,
|
|
"eval_Table_GPT.json_steps_per_second": 3.524,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_gpt_4o_200k.json_loss": 0.7959992289543152,
|
|
"eval_gpt_4o_200k.json_runtime": 48.4474,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.646,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.408,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_multi_turn_datas.json_loss": 0.326607346534729,
|
|
"eval_multi_turn_datas.json_runtime": 75.6077,
|
|
"eval_multi_turn_datas.json_samples_per_second": 52.931,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.209,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_table_python_code_datas.json_loss": 0.26808008551597595,
|
|
"eval_table_python_code_datas.json_runtime": 43.0557,
|
|
"eval_table_python_code_datas.json_samples_per_second": 50.144,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.09,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_tabular_llm_data.json_loss": 0.1004142090678215,
|
|
"eval_tabular_llm_data.json_runtime": 8.5429,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.796,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.288,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_python_code_critic_21k.json_loss": 0.5654606223106384,
|
|
"eval_python_code_critic_21k.json_runtime": 3.2351,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 184.538,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.728,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_all_merge_table_dataset.json_loss": 0.07576768845319748,
|
|
"eval_all_merge_table_dataset.json_runtime": 23.2598,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 30.611,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.29,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.579846203327179,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.4188,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 67.893,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.838,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_ultrainteract_sft.json_loss": 0.4181068241596222,
|
|
"eval_ultrainteract_sft.json_runtime": 8.6461,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 168.4,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.055,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.09818249940872192,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.1264,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 269.092,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.829,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_sft_react_sql_datas.json_loss": 0.6291559338569641,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.8451,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 40.025,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.785,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_all_merge_code.json_loss": 0.29108163714408875,
|
|
"eval_all_merge_code.json_runtime": 0.3447,
|
|
"eval_all_merge_code.json_samples_per_second": 182.771,
|
|
"eval_all_merge_code.json_steps_per_second": 8.703,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_magpie_datas.json_loss": 0.43020525574684143,
|
|
"eval_magpie_datas.json_runtime": 2.2179,
|
|
"eval_magpie_datas.json_samples_per_second": 77.551,
|
|
"eval_magpie_datas.json_steps_per_second": 3.607,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_train_data_for_qwen.json_loss": 0.0027856978122144938,
|
|
"eval_train_data_for_qwen.json_runtime": 0.2444,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 40.919,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.092,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_alpaca_cleaned.json_loss": 0.9129724502563477,
|
|
"eval_alpaca_cleaned.json_runtime": 0.1153,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 234.093,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.34,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_agent_instruct.json_loss": 0.22024483978748322,
|
|
"eval_agent_instruct.json_runtime": 0.5149,
|
|
"eval_agent_instruct.json_samples_per_second": 93.222,
|
|
"eval_agent_instruct.json_steps_per_second": 3.884,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_MathInstruct.json_loss": 0.20060402154922485,
|
|
"eval_MathInstruct.json_runtime": 0.3648,
|
|
"eval_MathInstruct.json_samples_per_second": 156.23,
|
|
"eval_MathInstruct.json_steps_per_second": 8.223,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.44536128640174866,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.3002,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 113.24,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.661,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.00967579148709774,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.1002,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 229.452,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 9.976,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.5544477701187134,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.0511,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 313.214,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.576,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_alpaca_gpt4_zh.json_loss": 0.977000892162323,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.0508,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 216.666,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 19.697,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.49533899762650063,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.5895399451255798,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.4883,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 40.957,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.048,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.4958893742905301,
|
|
"grad_norm": 0.36430442333221436,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4418,
|
|
"step": 1802
|
|
},
|
|
{
|
|
"epoch": 0.4964397509545595,
|
|
"grad_norm": 0.35012543201446533,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4437,
|
|
"step": 1804
|
|
},
|
|
{
|
|
"epoch": 0.49699012761858896,
|
|
"grad_norm": 0.3726542294025421,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4332,
|
|
"step": 1806
|
|
},
|
|
{
|
|
"epoch": 0.4975405042826184,
|
|
"grad_norm": 0.3564360439777374,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4378,
|
|
"step": 1808
|
|
},
|
|
{
|
|
"epoch": 0.4980908809466479,
|
|
"grad_norm": 0.3730456233024597,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.443,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.4986412576106773,
|
|
"grad_norm": 0.3588622212409973,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4387,
|
|
"step": 1812
|
|
},
|
|
{
|
|
"epoch": 0.49919163427470675,
|
|
"grad_norm": 0.36861783266067505,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4392,
|
|
"step": 1814
|
|
},
|
|
{
|
|
"epoch": 0.4997420109387362,
|
|
"grad_norm": 0.3537515699863434,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4331,
|
|
"step": 1816
|
|
},
|
|
{
|
|
"epoch": 0.5002923876027656,
|
|
"grad_norm": 0.3723071813583374,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4429,
|
|
"step": 1818
|
|
},
|
|
{
|
|
"epoch": 0.5008427642667951,
|
|
"grad_norm": 0.37015634775161743,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4687,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.5013931409308245,
|
|
"grad_norm": 0.3528953790664673,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4315,
|
|
"step": 1822
|
|
},
|
|
{
|
|
"epoch": 0.501943517594854,
|
|
"grad_norm": 0.357120543718338,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4423,
|
|
"step": 1824
|
|
},
|
|
{
|
|
"epoch": 0.5024938942588835,
|
|
"grad_norm": 0.3655802607536316,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4475,
|
|
"step": 1826
|
|
},
|
|
{
|
|
"epoch": 0.5030442709229128,
|
|
"grad_norm": 0.3676040470600128,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4345,
|
|
"step": 1828
|
|
},
|
|
{
|
|
"epoch": 0.5035946475869423,
|
|
"grad_norm": 0.3427799940109253,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4422,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.5041450242509717,
|
|
"grad_norm": 0.3482607305049896,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4347,
|
|
"step": 1832
|
|
},
|
|
{
|
|
"epoch": 0.5046954009150012,
|
|
"grad_norm": 0.3690313398838043,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4572,
|
|
"step": 1834
|
|
},
|
|
{
|
|
"epoch": 0.5052457775790307,
|
|
"grad_norm": 0.351601243019104,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4445,
|
|
"step": 1836
|
|
},
|
|
{
|
|
"epoch": 0.5057961542430601,
|
|
"grad_norm": 0.3506658971309662,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4482,
|
|
"step": 1838
|
|
},
|
|
{
|
|
"epoch": 0.5063465309070896,
|
|
"grad_norm": 0.36706456542015076,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4503,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.5068969075711189,
|
|
"grad_norm": 0.36632585525512695,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4385,
|
|
"step": 1842
|
|
},
|
|
{
|
|
"epoch": 0.5074472842351484,
|
|
"grad_norm": 0.3675621747970581,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4391,
|
|
"step": 1844
|
|
},
|
|
{
|
|
"epoch": 0.5079976608991779,
|
|
"grad_norm": 0.3883734941482544,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4435,
|
|
"step": 1846
|
|
},
|
|
{
|
|
"epoch": 0.5085480375632073,
|
|
"grad_norm": 0.34348422288894653,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4388,
|
|
"step": 1848
|
|
},
|
|
{
|
|
"epoch": 0.5090984142272368,
|
|
"grad_norm": 0.36695536971092224,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4473,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.5096487908912662,
|
|
"grad_norm": 0.36929944157600403,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4542,
|
|
"step": 1852
|
|
},
|
|
{
|
|
"epoch": 0.5101991675552957,
|
|
"grad_norm": 0.3946716785430908,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4399,
|
|
"step": 1854
|
|
},
|
|
{
|
|
"epoch": 0.5107495442193251,
|
|
"grad_norm": 0.3619132936000824,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4471,
|
|
"step": 1856
|
|
},
|
|
{
|
|
"epoch": 0.5112999208833545,
|
|
"grad_norm": 0.34836745262145996,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4392,
|
|
"step": 1858
|
|
},
|
|
{
|
|
"epoch": 0.511850297547384,
|
|
"grad_norm": 0.37516769766807556,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4579,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.5124006742114134,
|
|
"grad_norm": 0.35800984501838684,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4479,
|
|
"step": 1862
|
|
},
|
|
{
|
|
"epoch": 0.5129510508754429,
|
|
"grad_norm": 0.3664796054363251,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4556,
|
|
"step": 1864
|
|
},
|
|
{
|
|
"epoch": 0.5135014275394724,
|
|
"grad_norm": 0.3633113503456116,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4405,
|
|
"step": 1866
|
|
},
|
|
{
|
|
"epoch": 0.5140518042035017,
|
|
"grad_norm": 0.3655359447002411,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4486,
|
|
"step": 1868
|
|
},
|
|
{
|
|
"epoch": 0.5146021808675312,
|
|
"grad_norm": 0.36135318875312805,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4473,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.5151525575315606,
|
|
"grad_norm": 0.4725627601146698,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4579,
|
|
"step": 1872
|
|
},
|
|
{
|
|
"epoch": 0.5157029341955901,
|
|
"grad_norm": 0.37844300270080566,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4502,
|
|
"step": 1874
|
|
},
|
|
{
|
|
"epoch": 0.5162533108596196,
|
|
"grad_norm": 0.35601717233657837,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4392,
|
|
"step": 1876
|
|
},
|
|
{
|
|
"epoch": 0.516803687523649,
|
|
"grad_norm": 0.3960351049900055,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4519,
|
|
"step": 1878
|
|
},
|
|
{
|
|
"epoch": 0.5173540641876785,
|
|
"grad_norm": 0.3775772154331207,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4553,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.5179044408517078,
|
|
"grad_norm": 0.3815532624721527,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4479,
|
|
"step": 1882
|
|
},
|
|
{
|
|
"epoch": 0.5184548175157373,
|
|
"grad_norm": 0.3661166727542877,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4423,
|
|
"step": 1884
|
|
},
|
|
{
|
|
"epoch": 0.5190051941797668,
|
|
"grad_norm": 0.3378327786922455,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4419,
|
|
"step": 1886
|
|
},
|
|
{
|
|
"epoch": 0.5195555708437962,
|
|
"grad_norm": 0.34638261795043945,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4379,
|
|
"step": 1888
|
|
},
|
|
{
|
|
"epoch": 0.5201059475078257,
|
|
"grad_norm": 0.35764721035957336,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4389,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.5206563241718551,
|
|
"grad_norm": 0.3674796223640442,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4438,
|
|
"step": 1892
|
|
},
|
|
{
|
|
"epoch": 0.5212067008358846,
|
|
"grad_norm": 0.34744736552238464,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4317,
|
|
"step": 1894
|
|
},
|
|
{
|
|
"epoch": 0.521757077499914,
|
|
"grad_norm": 0.39198940992355347,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4406,
|
|
"step": 1896
|
|
},
|
|
{
|
|
"epoch": 0.5223074541639434,
|
|
"grad_norm": 0.3545363247394562,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4255,
|
|
"step": 1898
|
|
},
|
|
{
|
|
"epoch": 0.5228578308279729,
|
|
"grad_norm": 0.3635193407535553,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4521,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.5234082074920023,
|
|
"grad_norm": 0.33844560384750366,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4371,
|
|
"step": 1902
|
|
},
|
|
{
|
|
"epoch": 0.5239585841560318,
|
|
"grad_norm": 0.34886521100997925,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4328,
|
|
"step": 1904
|
|
},
|
|
{
|
|
"epoch": 0.5245089608200613,
|
|
"grad_norm": 0.34973517060279846,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4442,
|
|
"step": 1906
|
|
},
|
|
{
|
|
"epoch": 0.5250593374840907,
|
|
"grad_norm": 0.35180777311325073,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4575,
|
|
"step": 1908
|
|
},
|
|
{
|
|
"epoch": 0.5256097141481201,
|
|
"grad_norm": 0.36237335205078125,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4357,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.5261600908121495,
|
|
"grad_norm": 0.3784085512161255,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4559,
|
|
"step": 1912
|
|
},
|
|
{
|
|
"epoch": 0.526710467476179,
|
|
"grad_norm": 0.3556850254535675,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4563,
|
|
"step": 1914
|
|
},
|
|
{
|
|
"epoch": 0.5272608441402085,
|
|
"grad_norm": 0.3620041310787201,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4458,
|
|
"step": 1916
|
|
},
|
|
{
|
|
"epoch": 0.5278112208042379,
|
|
"grad_norm": 0.3616819679737091,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4304,
|
|
"step": 1918
|
|
},
|
|
{
|
|
"epoch": 0.5283615974682674,
|
|
"grad_norm": 0.3651537597179413,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4463,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.5289119741322967,
|
|
"grad_norm": 0.3924584686756134,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4418,
|
|
"step": 1922
|
|
},
|
|
{
|
|
"epoch": 0.5294623507963262,
|
|
"grad_norm": 0.353217214345932,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4437,
|
|
"step": 1924
|
|
},
|
|
{
|
|
"epoch": 0.5300127274603557,
|
|
"grad_norm": 0.3897522985935211,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4549,
|
|
"step": 1926
|
|
},
|
|
{
|
|
"epoch": 0.5305631041243851,
|
|
"grad_norm": 0.36462587118148804,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4247,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 0.5311134807884146,
|
|
"grad_norm": 0.3874776363372803,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4502,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.531663857452444,
|
|
"grad_norm": 0.3533260226249695,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4515,
|
|
"step": 1932
|
|
},
|
|
{
|
|
"epoch": 0.5322142341164735,
|
|
"grad_norm": 0.3668268024921417,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4474,
|
|
"step": 1934
|
|
},
|
|
{
|
|
"epoch": 0.5327646107805029,
|
|
"grad_norm": 0.3501083254814148,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4344,
|
|
"step": 1936
|
|
},
|
|
{
|
|
"epoch": 0.5333149874445323,
|
|
"grad_norm": 0.3565337657928467,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4412,
|
|
"step": 1938
|
|
},
|
|
{
|
|
"epoch": 0.5338653641085618,
|
|
"grad_norm": 0.34048742055892944,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4502,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.5344157407725912,
|
|
"grad_norm": 0.35694393515586853,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4532,
|
|
"step": 1942
|
|
},
|
|
{
|
|
"epoch": 0.5349661174366207,
|
|
"grad_norm": 0.3527338206768036,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4378,
|
|
"step": 1944
|
|
},
|
|
{
|
|
"epoch": 0.5355164941006502,
|
|
"grad_norm": 0.3684084117412567,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4562,
|
|
"step": 1946
|
|
},
|
|
{
|
|
"epoch": 0.5360668707646796,
|
|
"grad_norm": 0.3584345281124115,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4561,
|
|
"step": 1948
|
|
},
|
|
{
|
|
"epoch": 0.536617247428709,
|
|
"grad_norm": 0.35685622692108154,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4532,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.5371676240927384,
|
|
"grad_norm": 0.36560460925102234,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4529,
|
|
"step": 1952
|
|
},
|
|
{
|
|
"epoch": 0.5377180007567679,
|
|
"grad_norm": 0.36613890528678894,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4536,
|
|
"step": 1954
|
|
},
|
|
{
|
|
"epoch": 0.5382683774207974,
|
|
"grad_norm": 0.3513580858707428,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4496,
|
|
"step": 1956
|
|
},
|
|
{
|
|
"epoch": 0.5388187540848268,
|
|
"grad_norm": 0.38372403383255005,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4506,
|
|
"step": 1958
|
|
},
|
|
{
|
|
"epoch": 0.5393691307488563,
|
|
"grad_norm": 0.35690757632255554,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4371,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.5399195074128857,
|
|
"grad_norm": 0.36706483364105225,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4292,
|
|
"step": 1962
|
|
},
|
|
{
|
|
"epoch": 0.5404698840769151,
|
|
"grad_norm": 0.35754841566085815,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4543,
|
|
"step": 1964
|
|
},
|
|
{
|
|
"epoch": 0.5410202607409446,
|
|
"grad_norm": 0.35544702410697937,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4522,
|
|
"step": 1966
|
|
},
|
|
{
|
|
"epoch": 0.541570637404974,
|
|
"grad_norm": 0.3689357042312622,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4447,
|
|
"step": 1968
|
|
},
|
|
{
|
|
"epoch": 0.5421210140690035,
|
|
"grad_norm": 0.35911116003990173,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4253,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.5426713907330329,
|
|
"grad_norm": 0.3458103537559509,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4398,
|
|
"step": 1972
|
|
},
|
|
{
|
|
"epoch": 0.5432217673970624,
|
|
"grad_norm": 0.3606932759284973,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4486,
|
|
"step": 1974
|
|
},
|
|
{
|
|
"epoch": 0.5437721440610918,
|
|
"grad_norm": 0.3759188652038574,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4339,
|
|
"step": 1976
|
|
},
|
|
{
|
|
"epoch": 0.5443225207251212,
|
|
"grad_norm": 0.3803597390651703,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4575,
|
|
"step": 1978
|
|
},
|
|
{
|
|
"epoch": 0.5448728973891507,
|
|
"grad_norm": 0.36220523715019226,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4427,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.5454232740531801,
|
|
"grad_norm": 0.36756813526153564,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4297,
|
|
"step": 1982
|
|
},
|
|
{
|
|
"epoch": 0.5459736507172096,
|
|
"grad_norm": 0.35930246114730835,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4375,
|
|
"step": 1984
|
|
},
|
|
{
|
|
"epoch": 0.5465240273812391,
|
|
"grad_norm": 0.38998985290527344,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4331,
|
|
"step": 1986
|
|
},
|
|
{
|
|
"epoch": 0.5470744040452685,
|
|
"grad_norm": 0.35975074768066406,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4493,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 0.5476247807092979,
|
|
"grad_norm": 0.3618590533733368,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4431,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.5481751573733273,
|
|
"grad_norm": 0.3768090009689331,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4414,
|
|
"step": 1992
|
|
},
|
|
{
|
|
"epoch": 0.5487255340373568,
|
|
"grad_norm": 0.3526524305343628,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4349,
|
|
"step": 1994
|
|
},
|
|
{
|
|
"epoch": 0.5492759107013863,
|
|
"grad_norm": 0.3426629900932312,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4345,
|
|
"step": 1996
|
|
},
|
|
{
|
|
"epoch": 0.5498262873654157,
|
|
"grad_norm": 0.3500785529613495,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4415,
|
|
"step": 1998
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"grad_norm": 0.3602929413318634,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4454,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_merge_loss": 0.3855894207954407,
|
|
"eval_merge_runtime": 600.0048,
|
|
"eval_merge_samples_per_second": 56.253,
|
|
"eval_merge_steps_per_second": 2.345,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.5099759697914124,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.3782,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 73.905,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 3.083,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_sharegpt_gpt4.json_loss": 0.763576865196228,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.7204,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.669,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.459,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_Table_GPT.json_loss": 0.055675260722637177,
|
|
"eval_Table_GPT.json_runtime": 24.9781,
|
|
"eval_Table_GPT.json_samples_per_second": 83.793,
|
|
"eval_Table_GPT.json_steps_per_second": 3.523,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_gpt_4o_200k.json_loss": 0.7919400334358215,
|
|
"eval_gpt_4o_200k.json_runtime": 48.5207,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.45,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.4,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_multi_turn_datas.json_loss": 0.321598082780838,
|
|
"eval_multi_turn_datas.json_runtime": 75.7401,
|
|
"eval_multi_turn_datas.json_samples_per_second": 52.839,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.205,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_table_python_code_datas.json_loss": 0.26337531208992004,
|
|
"eval_table_python_code_datas.json_runtime": 43.1695,
|
|
"eval_table_python_code_datas.json_samples_per_second": 50.012,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.085,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_tabular_llm_data.json_loss": 0.09393570572137833,
|
|
"eval_tabular_llm_data.json_runtime": 8.5822,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.664,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.282,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_python_code_critic_21k.json_loss": 0.5615730285644531,
|
|
"eval_python_code_critic_21k.json_runtime": 3.2332,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 184.645,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.732,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_all_merge_table_dataset.json_loss": 0.07384855300188065,
|
|
"eval_all_merge_table_dataset.json_runtime": 23.3929,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 30.437,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.282,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.5769618153572083,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.4541,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 67.819,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.835,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_ultrainteract_sft.json_loss": 0.41532665491104126,
|
|
"eval_ultrainteract_sft.json_runtime": 8.6954,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 167.445,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.015,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.09223779290914536,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.1265,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 268.84,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.814,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_sft_react_sql_datas.json_loss": 0.6254591941833496,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.8542,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 39.979,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.782,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_all_merge_code.json_loss": 0.2845838665962219,
|
|
"eval_all_merge_code.json_runtime": 0.3345,
|
|
"eval_all_merge_code.json_samples_per_second": 188.319,
|
|
"eval_all_merge_code.json_steps_per_second": 8.968,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_magpie_datas.json_loss": 0.4300972521305084,
|
|
"eval_magpie_datas.json_runtime": 2.2166,
|
|
"eval_magpie_datas.json_samples_per_second": 77.598,
|
|
"eval_magpie_datas.json_steps_per_second": 3.609,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_train_data_for_qwen.json_loss": 0.0036769520957022905,
|
|
"eval_train_data_for_qwen.json_runtime": 0.2431,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 41.14,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.114,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_alpaca_cleaned.json_loss": 0.9104709625244141,
|
|
"eval_alpaca_cleaned.json_runtime": 0.1148,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 235.266,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.427,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_agent_instruct.json_loss": 0.220087930560112,
|
|
"eval_agent_instruct.json_runtime": 0.5143,
|
|
"eval_agent_instruct.json_samples_per_second": 93.334,
|
|
"eval_agent_instruct.json_steps_per_second": 3.889,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_MathInstruct.json_loss": 0.1989249587059021,
|
|
"eval_MathInstruct.json_runtime": 0.3499,
|
|
"eval_MathInstruct.json_samples_per_second": 162.904,
|
|
"eval_MathInstruct.json_steps_per_second": 8.574,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.4425477683544159,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.3008,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 113.029,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.649,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.008927595801651478,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.1003,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 229.301,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 9.97,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.5485728979110718,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.0512,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 312.726,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.545,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_alpaca_gpt4_zh.json_loss": 0.9768400192260742,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.0505,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 217.931,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 19.812,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5503766640294452,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.587010383605957,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.4876,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 41.015,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.051,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5509270406934746,
|
|
"grad_norm": 0.3626772463321686,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4442,
|
|
"step": 2002
|
|
},
|
|
{
|
|
"epoch": 0.551477417357504,
|
|
"grad_norm": 0.34878280758857727,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4458,
|
|
"step": 2004
|
|
},
|
|
{
|
|
"epoch": 0.5520277940215335,
|
|
"grad_norm": 0.35377946496009827,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4273,
|
|
"step": 2006
|
|
},
|
|
{
|
|
"epoch": 0.5525781706855629,
|
|
"grad_norm": 0.3649701774120331,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4342,
|
|
"step": 2008
|
|
},
|
|
{
|
|
"epoch": 0.5531285473495924,
|
|
"grad_norm": 0.34736165404319763,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4298,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.5536789240136218,
|
|
"grad_norm": 0.3697884678840637,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4424,
|
|
"step": 2012
|
|
},
|
|
{
|
|
"epoch": 0.5542293006776513,
|
|
"grad_norm": 0.40290403366088867,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4388,
|
|
"step": 2014
|
|
},
|
|
{
|
|
"epoch": 0.5547796773416808,
|
|
"grad_norm": 0.36797061562538147,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4648,
|
|
"step": 2016
|
|
},
|
|
{
|
|
"epoch": 0.5553300540057101,
|
|
"grad_norm": 0.35621124505996704,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.433,
|
|
"step": 2018
|
|
},
|
|
{
|
|
"epoch": 0.5558804306697396,
|
|
"grad_norm": 0.3625437915325165,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.441,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.556430807333769,
|
|
"grad_norm": 0.3642013370990753,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4425,
|
|
"step": 2022
|
|
},
|
|
{
|
|
"epoch": 0.5569811839977985,
|
|
"grad_norm": 0.36053115129470825,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4422,
|
|
"step": 2024
|
|
},
|
|
{
|
|
"epoch": 0.557531560661828,
|
|
"grad_norm": 0.36283549666404724,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4338,
|
|
"step": 2026
|
|
},
|
|
{
|
|
"epoch": 0.5580819373258574,
|
|
"grad_norm": 0.3758421540260315,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.439,
|
|
"step": 2028
|
|
},
|
|
{
|
|
"epoch": 0.5586323139898868,
|
|
"grad_norm": 0.33730989694595337,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4446,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.5591826906539162,
|
|
"grad_norm": 0.36297255754470825,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4358,
|
|
"step": 2032
|
|
},
|
|
{
|
|
"epoch": 0.5597330673179457,
|
|
"grad_norm": 0.3534908890724182,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4257,
|
|
"step": 2034
|
|
},
|
|
{
|
|
"epoch": 0.5602834439819752,
|
|
"grad_norm": 0.3690515160560608,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4383,
|
|
"step": 2036
|
|
},
|
|
{
|
|
"epoch": 0.5608338206460046,
|
|
"grad_norm": 0.3638661503791809,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4452,
|
|
"step": 2038
|
|
},
|
|
{
|
|
"epoch": 0.5613841973100341,
|
|
"grad_norm": 0.3521392047405243,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4342,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.5619345739740635,
|
|
"grad_norm": 0.3569532632827759,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4507,
|
|
"step": 2042
|
|
},
|
|
{
|
|
"epoch": 0.5624849506380929,
|
|
"grad_norm": 0.37072595953941345,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4354,
|
|
"step": 2044
|
|
},
|
|
{
|
|
"epoch": 0.5630353273021224,
|
|
"grad_norm": 0.38489988446235657,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4528,
|
|
"step": 2046
|
|
},
|
|
{
|
|
"epoch": 0.5635857039661518,
|
|
"grad_norm": 0.38305357098579407,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4428,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 0.5641360806301813,
|
|
"grad_norm": 0.3491927981376648,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4242,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.5646864572942107,
|
|
"grad_norm": 0.35508430004119873,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4556,
|
|
"step": 2052
|
|
},
|
|
{
|
|
"epoch": 0.5652368339582402,
|
|
"grad_norm": 0.36298030614852905,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4337,
|
|
"step": 2054
|
|
},
|
|
{
|
|
"epoch": 0.5657872106222697,
|
|
"grad_norm": 0.3598901629447937,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4378,
|
|
"step": 2056
|
|
},
|
|
{
|
|
"epoch": 0.566337587286299,
|
|
"grad_norm": 0.3838946223258972,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4346,
|
|
"step": 2058
|
|
},
|
|
{
|
|
"epoch": 0.5668879639503285,
|
|
"grad_norm": 0.3986867666244507,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.45,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.5674383406143579,
|
|
"grad_norm": 0.3509708344936371,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4462,
|
|
"step": 2062
|
|
},
|
|
{
|
|
"epoch": 0.5679887172783874,
|
|
"grad_norm": 0.35189950466156006,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4307,
|
|
"step": 2064
|
|
},
|
|
{
|
|
"epoch": 0.5685390939424169,
|
|
"grad_norm": 0.37416207790374756,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4368,
|
|
"step": 2066
|
|
},
|
|
{
|
|
"epoch": 0.5690894706064463,
|
|
"grad_norm": 0.3902382254600525,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4278,
|
|
"step": 2068
|
|
},
|
|
{
|
|
"epoch": 0.5696398472704758,
|
|
"grad_norm": 0.384260892868042,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4449,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.5701902239345051,
|
|
"grad_norm": 0.367347776889801,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4397,
|
|
"step": 2072
|
|
},
|
|
{
|
|
"epoch": 0.5707406005985346,
|
|
"grad_norm": 0.35011574625968933,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4375,
|
|
"step": 2074
|
|
},
|
|
{
|
|
"epoch": 0.5712909772625641,
|
|
"grad_norm": 0.3609907329082489,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.446,
|
|
"step": 2076
|
|
},
|
|
{
|
|
"epoch": 0.5718413539265935,
|
|
"grad_norm": 0.3640425205230713,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4453,
|
|
"step": 2078
|
|
},
|
|
{
|
|
"epoch": 0.572391730590623,
|
|
"grad_norm": 0.3464198112487793,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4489,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.5729421072546524,
|
|
"grad_norm": 0.3741483688354492,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4515,
|
|
"step": 2082
|
|
},
|
|
{
|
|
"epoch": 0.5734924839186818,
|
|
"grad_norm": 0.37388619780540466,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4632,
|
|
"step": 2084
|
|
},
|
|
{
|
|
"epoch": 0.5740428605827113,
|
|
"grad_norm": 0.37237605452537537,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4425,
|
|
"step": 2086
|
|
},
|
|
{
|
|
"epoch": 0.5745932372467407,
|
|
"grad_norm": 0.35421323776245117,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4474,
|
|
"step": 2088
|
|
},
|
|
{
|
|
"epoch": 0.5751436139107702,
|
|
"grad_norm": 0.33015069365501404,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.43,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 0.5756939905747996,
|
|
"grad_norm": 0.3670506179332733,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4452,
|
|
"step": 2092
|
|
},
|
|
{
|
|
"epoch": 0.5762443672388291,
|
|
"grad_norm": 0.3514888882637024,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4287,
|
|
"step": 2094
|
|
},
|
|
{
|
|
"epoch": 0.5767947439028586,
|
|
"grad_norm": 0.3714512288570404,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4344,
|
|
"step": 2096
|
|
},
|
|
{
|
|
"epoch": 0.5773451205668879,
|
|
"grad_norm": 0.35363397002220154,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4408,
|
|
"step": 2098
|
|
},
|
|
{
|
|
"epoch": 0.5778954972309174,
|
|
"grad_norm": 0.3529844582080841,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4434,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.5784458738949468,
|
|
"grad_norm": 0.3400002121925354,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4443,
|
|
"step": 2102
|
|
},
|
|
{
|
|
"epoch": 0.5789962505589763,
|
|
"grad_norm": 0.3620370328426361,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4377,
|
|
"step": 2104
|
|
},
|
|
{
|
|
"epoch": 0.5795466272230058,
|
|
"grad_norm": 0.3476988971233368,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4321,
|
|
"step": 2106
|
|
},
|
|
{
|
|
"epoch": 0.5800970038870352,
|
|
"grad_norm": 0.35739636421203613,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4495,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 0.5806473805510647,
|
|
"grad_norm": 0.3718028962612152,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4391,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 0.581197757215094,
|
|
"grad_norm": 0.35041627287864685,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.454,
|
|
"step": 2112
|
|
},
|
|
{
|
|
"epoch": 0.5817481338791235,
|
|
"grad_norm": 0.36277493834495544,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.44,
|
|
"step": 2114
|
|
},
|
|
{
|
|
"epoch": 0.582298510543153,
|
|
"grad_norm": 0.36685582995414734,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4401,
|
|
"step": 2116
|
|
},
|
|
{
|
|
"epoch": 0.5828488872071824,
|
|
"grad_norm": 0.33634135127067566,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4338,
|
|
"step": 2118
|
|
},
|
|
{
|
|
"epoch": 0.5833992638712119,
|
|
"grad_norm": 0.36546674370765686,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4456,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 0.5839496405352413,
|
|
"grad_norm": 0.361472487449646,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4368,
|
|
"step": 2122
|
|
},
|
|
{
|
|
"epoch": 0.5845000171992708,
|
|
"grad_norm": 0.36743828654289246,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4464,
|
|
"step": 2124
|
|
},
|
|
{
|
|
"epoch": 0.5850503938633002,
|
|
"grad_norm": 0.35304173827171326,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4407,
|
|
"step": 2126
|
|
},
|
|
{
|
|
"epoch": 0.5856007705273296,
|
|
"grad_norm": 0.35151979327201843,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4532,
|
|
"step": 2128
|
|
},
|
|
{
|
|
"epoch": 0.5861511471913591,
|
|
"grad_norm": 0.34761616587638855,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.444,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 0.5867015238553885,
|
|
"grad_norm": 0.3763500452041626,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4524,
|
|
"step": 2132
|
|
},
|
|
{
|
|
"epoch": 0.587251900519418,
|
|
"grad_norm": 0.36489951610565186,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4333,
|
|
"step": 2134
|
|
},
|
|
{
|
|
"epoch": 0.5878022771834475,
|
|
"grad_norm": 0.38710853457450867,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4517,
|
|
"step": 2136
|
|
},
|
|
{
|
|
"epoch": 0.5883526538474768,
|
|
"grad_norm": 0.36153027415275574,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.438,
|
|
"step": 2138
|
|
},
|
|
{
|
|
"epoch": 0.5889030305115063,
|
|
"grad_norm": 0.3907857835292816,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4429,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 0.5894534071755357,
|
|
"grad_norm": 0.3813617527484894,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4392,
|
|
"step": 2142
|
|
},
|
|
{
|
|
"epoch": 0.5900037838395652,
|
|
"grad_norm": 0.3563400208950043,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.434,
|
|
"step": 2144
|
|
},
|
|
{
|
|
"epoch": 0.5905541605035947,
|
|
"grad_norm": 0.3556332290172577,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4436,
|
|
"step": 2146
|
|
},
|
|
{
|
|
"epoch": 0.5911045371676241,
|
|
"grad_norm": 0.3623802363872528,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4378,
|
|
"step": 2148
|
|
},
|
|
{
|
|
"epoch": 0.5916549138316536,
|
|
"grad_norm": 0.36329442262649536,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4386,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 0.5922052904956829,
|
|
"grad_norm": 0.3771746754646301,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4494,
|
|
"step": 2152
|
|
},
|
|
{
|
|
"epoch": 0.5927556671597124,
|
|
"grad_norm": 0.34596994519233704,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4173,
|
|
"step": 2154
|
|
},
|
|
{
|
|
"epoch": 0.5933060438237419,
|
|
"grad_norm": 0.36507177352905273,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4254,
|
|
"step": 2156
|
|
},
|
|
{
|
|
"epoch": 0.5938564204877713,
|
|
"grad_norm": 0.3519168794155121,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4447,
|
|
"step": 2158
|
|
},
|
|
{
|
|
"epoch": 0.5944067971518008,
|
|
"grad_norm": 0.35316991806030273,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4622,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 0.5949571738158302,
|
|
"grad_norm": 0.3529471158981323,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4482,
|
|
"step": 2162
|
|
},
|
|
{
|
|
"epoch": 0.5955075504798597,
|
|
"grad_norm": 0.3722255825996399,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4454,
|
|
"step": 2164
|
|
},
|
|
{
|
|
"epoch": 0.596057927143889,
|
|
"grad_norm": 0.3557456433773041,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4435,
|
|
"step": 2166
|
|
},
|
|
{
|
|
"epoch": 0.5966083038079185,
|
|
"grad_norm": 0.3348141610622406,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.436,
|
|
"step": 2168
|
|
},
|
|
{
|
|
"epoch": 0.597158680471948,
|
|
"grad_norm": 0.38193532824516296,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4543,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 0.5977090571359774,
|
|
"grad_norm": 0.3672102391719818,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4356,
|
|
"step": 2172
|
|
},
|
|
{
|
|
"epoch": 0.5982594338000069,
|
|
"grad_norm": 0.37538838386535645,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4442,
|
|
"step": 2174
|
|
},
|
|
{
|
|
"epoch": 0.5988098104640364,
|
|
"grad_norm": 0.3512885272502899,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4249,
|
|
"step": 2176
|
|
},
|
|
{
|
|
"epoch": 0.5993601871280658,
|
|
"grad_norm": 0.4028591811656952,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4495,
|
|
"step": 2178
|
|
},
|
|
{
|
|
"epoch": 0.5999105637920952,
|
|
"grad_norm": 0.3539179861545563,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4504,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 0.6004609404561246,
|
|
"grad_norm": 0.34848934412002563,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4348,
|
|
"step": 2182
|
|
},
|
|
{
|
|
"epoch": 0.6010113171201541,
|
|
"grad_norm": 0.37469926476478577,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4414,
|
|
"step": 2184
|
|
},
|
|
{
|
|
"epoch": 0.6015616937841836,
|
|
"grad_norm": 0.3511207103729248,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4489,
|
|
"step": 2186
|
|
},
|
|
{
|
|
"epoch": 0.602112070448213,
|
|
"grad_norm": 0.3594874441623688,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4429,
|
|
"step": 2188
|
|
},
|
|
{
|
|
"epoch": 0.6026624471122425,
|
|
"grad_norm": 0.37694159150123596,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4365,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 0.6032128237762718,
|
|
"grad_norm": 0.3630627393722534,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4449,
|
|
"step": 2192
|
|
},
|
|
{
|
|
"epoch": 0.6037632004403013,
|
|
"grad_norm": 0.352230042219162,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4382,
|
|
"step": 2194
|
|
},
|
|
{
|
|
"epoch": 0.6043135771043308,
|
|
"grad_norm": 0.369757741689682,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4443,
|
|
"step": 2196
|
|
},
|
|
{
|
|
"epoch": 0.6048639537683602,
|
|
"grad_norm": 0.37120938301086426,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.454,
|
|
"step": 2198
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"grad_norm": 0.3475727140903473,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4424,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_merge_loss": 0.38126564025878906,
|
|
"eval_merge_runtime": 600.3103,
|
|
"eval_merge_samples_per_second": 56.224,
|
|
"eval_merge_steps_per_second": 2.344,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.5048007369041443,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.3514,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 74.096,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 3.091,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_sharegpt_gpt4.json_loss": 0.7578977346420288,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.6981,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.71,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.461,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_Table_GPT.json_loss": 0.05305211618542671,
|
|
"eval_Table_GPT.json_runtime": 25.0091,
|
|
"eval_Table_GPT.json_samples_per_second": 83.69,
|
|
"eval_Table_GPT.json_steps_per_second": 3.519,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_gpt_4o_200k.json_loss": 0.7855507135391235,
|
|
"eval_gpt_4o_200k.json_runtime": 48.5546,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.36,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.396,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_multi_turn_datas.json_loss": 0.3139781355857849,
|
|
"eval_multi_turn_datas.json_runtime": 75.6414,
|
|
"eval_multi_turn_datas.json_samples_per_second": 52.908,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.208,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_table_python_code_datas.json_loss": 0.2603669762611389,
|
|
"eval_table_python_code_datas.json_runtime": 43.0857,
|
|
"eval_table_python_code_datas.json_samples_per_second": 50.109,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.089,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_tabular_llm_data.json_loss": 0.0890057235956192,
|
|
"eval_tabular_llm_data.json_runtime": 8.5461,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.785,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.287,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_python_code_critic_21k.json_loss": 0.5582770705223083,
|
|
"eval_python_code_critic_21k.json_runtime": 3.2316,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 184.737,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.736,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_all_merge_table_dataset.json_loss": 0.07120716571807861,
|
|
"eval_all_merge_table_dataset.json_runtime": 23.3637,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 30.475,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.284,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.5745006799697876,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.5197,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 67.682,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.829,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_ultrainteract_sft.json_loss": 0.41318273544311523,
|
|
"eval_ultrainteract_sft.json_runtime": 8.6602,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 168.125,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.044,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.09635543823242188,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.1265,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 268.832,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.814,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_sft_react_sql_datas.json_loss": 0.6216484904289246,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.8599,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 39.949,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.781,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_all_merge_code.json_loss": 0.2849319279193878,
|
|
"eval_all_merge_code.json_runtime": 0.3296,
|
|
"eval_all_merge_code.json_samples_per_second": 191.112,
|
|
"eval_all_merge_code.json_steps_per_second": 9.101,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_magpie_datas.json_loss": 0.4269045293331146,
|
|
"eval_magpie_datas.json_runtime": 2.2161,
|
|
"eval_magpie_datas.json_samples_per_second": 77.615,
|
|
"eval_magpie_datas.json_steps_per_second": 3.61,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_train_data_for_qwen.json_loss": 0.005929525941610336,
|
|
"eval_train_data_for_qwen.json_runtime": 0.2454,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 40.757,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.076,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_alpaca_cleaned.json_loss": 0.9076781272888184,
|
|
"eval_alpaca_cleaned.json_runtime": 0.1144,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 236.011,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.482,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_agent_instruct.json_loss": 0.2231922596693039,
|
|
"eval_agent_instruct.json_runtime": 0.5154,
|
|
"eval_agent_instruct.json_samples_per_second": 93.136,
|
|
"eval_agent_instruct.json_steps_per_second": 3.881,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_MathInstruct.json_loss": 0.19876058399677277,
|
|
"eval_MathInstruct.json_runtime": 0.3563,
|
|
"eval_MathInstruct.json_samples_per_second": 159.969,
|
|
"eval_MathInstruct.json_steps_per_second": 8.419,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.4431252181529999,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.3026,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 112.374,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.61,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.00838847178965807,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.1,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 230.081,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 10.004,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.5384413003921509,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.0514,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 311.198,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.45,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_alpaca_gpt4_zh.json_loss": 0.969275712966919,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.0504,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 218.311,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 19.846,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6054143304323897,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.5901365876197815,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.4874,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 41.032,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.052,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 0.6059647070964191,
|
|
"grad_norm": 0.37194857001304626,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.424,
|
|
"step": 2202
|
|
},
|
|
{
|
|
"epoch": 0.6065150837604486,
|
|
"grad_norm": 0.36095818877220154,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4344,
|
|
"step": 2204
|
|
},
|
|
{
|
|
"epoch": 0.607065460424478,
|
|
"grad_norm": 0.36337706446647644,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4446,
|
|
"step": 2206
|
|
},
|
|
{
|
|
"epoch": 0.6076158370885074,
|
|
"grad_norm": 0.3500390946865082,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4304,
|
|
"step": 2208
|
|
},
|
|
{
|
|
"epoch": 0.6081662137525369,
|
|
"grad_norm": 0.3477112054824829,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4346,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 0.6087165904165663,
|
|
"grad_norm": 0.36322692036628723,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4311,
|
|
"step": 2212
|
|
},
|
|
{
|
|
"epoch": 0.6092669670805958,
|
|
"grad_norm": 0.37783941626548767,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4389,
|
|
"step": 2214
|
|
},
|
|
{
|
|
"epoch": 0.6098173437446253,
|
|
"grad_norm": 0.36018887162208557,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4321,
|
|
"step": 2216
|
|
},
|
|
{
|
|
"epoch": 0.6103677204086547,
|
|
"grad_norm": 0.34396857023239136,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4349,
|
|
"step": 2218
|
|
},
|
|
{
|
|
"epoch": 0.6109180970726841,
|
|
"grad_norm": 0.3611605167388916,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4305,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 0.6114684737367135,
|
|
"grad_norm": 0.339339941740036,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4338,
|
|
"step": 2222
|
|
},
|
|
{
|
|
"epoch": 0.612018850400743,
|
|
"grad_norm": 0.32705169916152954,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4275,
|
|
"step": 2224
|
|
},
|
|
{
|
|
"epoch": 0.6125692270647725,
|
|
"grad_norm": 0.3551005721092224,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4365,
|
|
"step": 2226
|
|
},
|
|
{
|
|
"epoch": 0.6131196037288019,
|
|
"grad_norm": 0.3826168179512024,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4325,
|
|
"step": 2228
|
|
},
|
|
{
|
|
"epoch": 0.6136699803928314,
|
|
"grad_norm": 0.376407653093338,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4325,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 0.6142203570568608,
|
|
"grad_norm": 0.3507418930530548,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4315,
|
|
"step": 2232
|
|
},
|
|
{
|
|
"epoch": 0.6147707337208902,
|
|
"grad_norm": 0.3515014946460724,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4432,
|
|
"step": 2234
|
|
},
|
|
{
|
|
"epoch": 0.6153211103849197,
|
|
"grad_norm": 0.37726324796676636,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4389,
|
|
"step": 2236
|
|
},
|
|
{
|
|
"epoch": 0.6158714870489491,
|
|
"grad_norm": 0.35043272376060486,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4406,
|
|
"step": 2238
|
|
},
|
|
{
|
|
"epoch": 0.6164218637129786,
|
|
"grad_norm": 0.3619838356971741,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4381,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 0.616972240377008,
|
|
"grad_norm": 0.3727911114692688,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4261,
|
|
"step": 2242
|
|
},
|
|
{
|
|
"epoch": 0.6175226170410375,
|
|
"grad_norm": 0.35618454217910767,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4353,
|
|
"step": 2244
|
|
},
|
|
{
|
|
"epoch": 0.6180729937050669,
|
|
"grad_norm": 0.3659394681453705,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4281,
|
|
"step": 2246
|
|
},
|
|
{
|
|
"epoch": 0.6186233703690963,
|
|
"grad_norm": 0.35864701867103577,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4409,
|
|
"step": 2248
|
|
},
|
|
{
|
|
"epoch": 0.6191737470331258,
|
|
"grad_norm": 0.36990123987197876,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4424,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 0.6197241236971552,
|
|
"grad_norm": 0.36422237753868103,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4516,
|
|
"step": 2252
|
|
},
|
|
{
|
|
"epoch": 0.6202745003611847,
|
|
"grad_norm": 0.34886521100997925,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4299,
|
|
"step": 2254
|
|
},
|
|
{
|
|
"epoch": 0.6208248770252142,
|
|
"grad_norm": 0.3683704137802124,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4379,
|
|
"step": 2256
|
|
},
|
|
{
|
|
"epoch": 0.6213752536892436,
|
|
"grad_norm": 0.3535701334476471,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4292,
|
|
"step": 2258
|
|
},
|
|
{
|
|
"epoch": 0.621925630353273,
|
|
"grad_norm": 0.370959997177124,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4425,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 0.6224760070173024,
|
|
"grad_norm": 0.3473008871078491,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4289,
|
|
"step": 2262
|
|
},
|
|
{
|
|
"epoch": 0.6230263836813319,
|
|
"grad_norm": 0.36245644092559814,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4525,
|
|
"step": 2264
|
|
},
|
|
{
|
|
"epoch": 0.6235767603453614,
|
|
"grad_norm": 0.37182751297950745,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4438,
|
|
"step": 2266
|
|
},
|
|
{
|
|
"epoch": 0.6241271370093908,
|
|
"grad_norm": 0.35843655467033386,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4403,
|
|
"step": 2268
|
|
},
|
|
{
|
|
"epoch": 0.6246775136734203,
|
|
"grad_norm": 0.3484828472137451,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.429,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 0.6252278903374497,
|
|
"grad_norm": 0.35097572207450867,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4435,
|
|
"step": 2272
|
|
},
|
|
{
|
|
"epoch": 0.6257782670014791,
|
|
"grad_norm": 0.35911381244659424,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.435,
|
|
"step": 2274
|
|
},
|
|
{
|
|
"epoch": 0.6263286436655086,
|
|
"grad_norm": 0.3544057309627533,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4359,
|
|
"step": 2276
|
|
},
|
|
{
|
|
"epoch": 0.626879020329538,
|
|
"grad_norm": 0.34516793489456177,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4261,
|
|
"step": 2278
|
|
},
|
|
{
|
|
"epoch": 0.6274293969935675,
|
|
"grad_norm": 0.3534994423389435,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4539,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 0.6279797736575969,
|
|
"grad_norm": 0.356238454580307,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4321,
|
|
"step": 2282
|
|
},
|
|
{
|
|
"epoch": 0.6285301503216264,
|
|
"grad_norm": 0.37285274267196655,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4515,
|
|
"step": 2284
|
|
},
|
|
{
|
|
"epoch": 0.6290805269856559,
|
|
"grad_norm": 0.3517172336578369,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4268,
|
|
"step": 2286
|
|
},
|
|
{
|
|
"epoch": 0.6296309036496852,
|
|
"grad_norm": 0.35732871294021606,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4363,
|
|
"step": 2288
|
|
},
|
|
{
|
|
"epoch": 0.6301812803137147,
|
|
"grad_norm": 0.3592797815799713,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4424,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 0.6307316569777441,
|
|
"grad_norm": 0.3233913481235504,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.421,
|
|
"step": 2292
|
|
},
|
|
{
|
|
"epoch": 0.6312820336417736,
|
|
"grad_norm": 0.361591100692749,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4299,
|
|
"step": 2294
|
|
},
|
|
{
|
|
"epoch": 0.6318324103058031,
|
|
"grad_norm": 0.3468184173107147,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4442,
|
|
"step": 2296
|
|
},
|
|
{
|
|
"epoch": 0.6323827869698325,
|
|
"grad_norm": 0.4019412398338318,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4453,
|
|
"step": 2298
|
|
},
|
|
{
|
|
"epoch": 0.6329331636338619,
|
|
"grad_norm": 0.3713074326515198,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.435,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 0.6334835402978913,
|
|
"grad_norm": 0.35839253664016724,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4449,
|
|
"step": 2302
|
|
},
|
|
{
|
|
"epoch": 0.6340339169619208,
|
|
"grad_norm": 0.33958542346954346,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4433,
|
|
"step": 2304
|
|
},
|
|
{
|
|
"epoch": 0.6345842936259503,
|
|
"grad_norm": 0.3750527501106262,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4297,
|
|
"step": 2306
|
|
},
|
|
{
|
|
"epoch": 0.6351346702899797,
|
|
"grad_norm": 0.35579168796539307,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4307,
|
|
"step": 2308
|
|
},
|
|
{
|
|
"epoch": 0.6356850469540092,
|
|
"grad_norm": 0.3424528241157532,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4451,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 0.6362354236180386,
|
|
"grad_norm": 0.3364480137825012,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4251,
|
|
"step": 2312
|
|
},
|
|
{
|
|
"epoch": 0.636785800282068,
|
|
"grad_norm": 0.35307276248931885,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4221,
|
|
"step": 2314
|
|
},
|
|
{
|
|
"epoch": 0.6373361769460975,
|
|
"grad_norm": 0.41354474425315857,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4462,
|
|
"step": 2316
|
|
},
|
|
{
|
|
"epoch": 0.6378865536101269,
|
|
"grad_norm": 0.37485471367836,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4337,
|
|
"step": 2318
|
|
},
|
|
{
|
|
"epoch": 0.6384369302741564,
|
|
"grad_norm": 0.344091534614563,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.43,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 0.6389873069381858,
|
|
"grad_norm": 0.3772261440753937,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4444,
|
|
"step": 2322
|
|
},
|
|
{
|
|
"epoch": 0.6395376836022153,
|
|
"grad_norm": 0.35307928919792175,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4332,
|
|
"step": 2324
|
|
},
|
|
{
|
|
"epoch": 0.6400880602662448,
|
|
"grad_norm": 0.35815975069999695,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4489,
|
|
"step": 2326
|
|
},
|
|
{
|
|
"epoch": 0.6406384369302741,
|
|
"grad_norm": 0.3731154799461365,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4355,
|
|
"step": 2328
|
|
},
|
|
{
|
|
"epoch": 0.6411888135943036,
|
|
"grad_norm": 0.36875462532043457,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4339,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 0.641739190258333,
|
|
"grad_norm": 0.36913126707077026,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4336,
|
|
"step": 2332
|
|
},
|
|
{
|
|
"epoch": 0.6422895669223625,
|
|
"grad_norm": 0.35829678177833557,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4438,
|
|
"step": 2334
|
|
},
|
|
{
|
|
"epoch": 0.642839943586392,
|
|
"grad_norm": 0.36390239000320435,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4405,
|
|
"step": 2336
|
|
},
|
|
{
|
|
"epoch": 0.6433903202504214,
|
|
"grad_norm": 0.34786713123321533,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.451,
|
|
"step": 2338
|
|
},
|
|
{
|
|
"epoch": 0.6439406969144508,
|
|
"grad_norm": 0.3522484600543976,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4395,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 0.6444910735784802,
|
|
"grad_norm": 0.36442965269088745,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4204,
|
|
"step": 2342
|
|
},
|
|
{
|
|
"epoch": 0.6450414502425097,
|
|
"grad_norm": 0.3635409474372864,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4507,
|
|
"step": 2344
|
|
},
|
|
{
|
|
"epoch": 0.6455918269065392,
|
|
"grad_norm": 0.35682952404022217,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4333,
|
|
"step": 2346
|
|
},
|
|
{
|
|
"epoch": 0.6461422035705686,
|
|
"grad_norm": 0.38101914525032043,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4409,
|
|
"step": 2348
|
|
},
|
|
{
|
|
"epoch": 0.6466925802345981,
|
|
"grad_norm": 0.37273916602134705,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4386,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 0.6472429568986275,
|
|
"grad_norm": 0.37394535541534424,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4426,
|
|
"step": 2352
|
|
},
|
|
{
|
|
"epoch": 0.6477933335626569,
|
|
"grad_norm": 0.3374865651130676,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.443,
|
|
"step": 2354
|
|
},
|
|
{
|
|
"epoch": 0.6483437102266864,
|
|
"grad_norm": 0.34875357151031494,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4135,
|
|
"step": 2356
|
|
},
|
|
{
|
|
"epoch": 0.6488940868907158,
|
|
"grad_norm": 0.365508109331131,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4455,
|
|
"step": 2358
|
|
},
|
|
{
|
|
"epoch": 0.6494444635547453,
|
|
"grad_norm": 0.36924096941947937,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4327,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 0.6499948402187747,
|
|
"grad_norm": 0.3646699786186218,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4324,
|
|
"step": 2362
|
|
},
|
|
{
|
|
"epoch": 0.6505452168828042,
|
|
"grad_norm": 0.34241992235183716,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4414,
|
|
"step": 2364
|
|
},
|
|
{
|
|
"epoch": 0.6510955935468337,
|
|
"grad_norm": 0.3360735774040222,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4228,
|
|
"step": 2366
|
|
},
|
|
{
|
|
"epoch": 0.651645970210863,
|
|
"grad_norm": 0.3782423138618469,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4366,
|
|
"step": 2368
|
|
},
|
|
{
|
|
"epoch": 0.6521963468748925,
|
|
"grad_norm": 0.3839074373245239,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4389,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 0.6527467235389219,
|
|
"grad_norm": 0.3636200726032257,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4418,
|
|
"step": 2372
|
|
},
|
|
{
|
|
"epoch": 0.6532971002029514,
|
|
"grad_norm": 0.3629804253578186,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4259,
|
|
"step": 2374
|
|
},
|
|
{
|
|
"epoch": 0.6538474768669809,
|
|
"grad_norm": 0.3819858133792877,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4348,
|
|
"step": 2376
|
|
},
|
|
{
|
|
"epoch": 0.6543978535310103,
|
|
"grad_norm": 0.3597410321235657,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.428,
|
|
"step": 2378
|
|
},
|
|
{
|
|
"epoch": 0.6549482301950398,
|
|
"grad_norm": 0.4084703326225281,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4478,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 0.6554986068590691,
|
|
"grad_norm": 0.35995879769325256,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4356,
|
|
"step": 2382
|
|
},
|
|
{
|
|
"epoch": 0.6560489835230986,
|
|
"grad_norm": 0.36047980189323425,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4479,
|
|
"step": 2384
|
|
},
|
|
{
|
|
"epoch": 0.6565993601871281,
|
|
"grad_norm": 0.3532986342906952,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.424,
|
|
"step": 2386
|
|
},
|
|
{
|
|
"epoch": 0.6571497368511575,
|
|
"grad_norm": 0.3374999761581421,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4338,
|
|
"step": 2388
|
|
},
|
|
{
|
|
"epoch": 0.657700113515187,
|
|
"grad_norm": 0.34645605087280273,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4257,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 0.6582504901792164,
|
|
"grad_norm": 0.36470580101013184,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4414,
|
|
"step": 2392
|
|
},
|
|
{
|
|
"epoch": 0.6588008668432458,
|
|
"grad_norm": 0.3823862075805664,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4306,
|
|
"step": 2394
|
|
},
|
|
{
|
|
"epoch": 0.6593512435072753,
|
|
"grad_norm": 0.4070727229118347,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4322,
|
|
"step": 2396
|
|
},
|
|
{
|
|
"epoch": 0.6599016201713047,
|
|
"grad_norm": 0.37519609928131104,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4248,
|
|
"step": 2398
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"grad_norm": 0.35447025299072266,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4283,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_merge_loss": 0.37715020775794983,
|
|
"eval_merge_runtime": 600.5757,
|
|
"eval_merge_samples_per_second": 56.199,
|
|
"eval_merge_steps_per_second": 2.343,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.5012194514274597,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.4212,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 73.6,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 3.071,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_sharegpt_gpt4.json_loss": 0.7534219026565552,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.7308,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.65,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.458,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_Table_GPT.json_loss": 0.050881169736385345,
|
|
"eval_Table_GPT.json_runtime": 24.9922,
|
|
"eval_Table_GPT.json_samples_per_second": 83.746,
|
|
"eval_Table_GPT.json_steps_per_second": 3.521,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_gpt_4o_200k.json_loss": 0.7805712223052979,
|
|
"eval_gpt_4o_200k.json_runtime": 48.518,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.457,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.4,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_multi_turn_datas.json_loss": 0.3069368898868561,
|
|
"eval_multi_turn_datas.json_runtime": 75.8513,
|
|
"eval_multi_turn_datas.json_samples_per_second": 52.761,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.202,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_table_python_code_datas.json_loss": 0.2562294006347656,
|
|
"eval_table_python_code_datas.json_runtime": 43.1545,
|
|
"eval_table_python_code_datas.json_samples_per_second": 50.03,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.086,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_tabular_llm_data.json_loss": 0.09128429740667343,
|
|
"eval_tabular_llm_data.json_runtime": 8.5524,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.764,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.286,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_python_code_critic_21k.json_loss": 0.5555644631385803,
|
|
"eval_python_code_critic_21k.json_runtime": 3.2271,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 184.994,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.747,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_all_merge_table_dataset.json_loss": 0.07006299495697021,
|
|
"eval_all_merge_table_dataset.json_runtime": 23.358,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 30.482,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.284,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.5720005035400391,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.5016,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 67.72,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.831,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_ultrainteract_sft.json_loss": 0.4097177982330322,
|
|
"eval_ultrainteract_sft.json_runtime": 8.6753,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 167.832,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.031,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.09309177845716476,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.1257,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 270.423,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.907,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_sft_react_sql_datas.json_loss": 0.6212250590324402,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.859,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 39.954,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.781,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_all_merge_code.json_loss": 0.28449881076812744,
|
|
"eval_all_merge_code.json_runtime": 0.3298,
|
|
"eval_all_merge_code.json_samples_per_second": 191.001,
|
|
"eval_all_merge_code.json_steps_per_second": 9.095,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_magpie_datas.json_loss": 0.426034539937973,
|
|
"eval_magpie_datas.json_runtime": 2.2154,
|
|
"eval_magpie_datas.json_samples_per_second": 77.638,
|
|
"eval_magpie_datas.json_steps_per_second": 3.611,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_train_data_for_qwen.json_loss": 0.005596214439719915,
|
|
"eval_train_data_for_qwen.json_runtime": 0.2424,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 41.251,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.125,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_alpaca_cleaned.json_loss": 0.9008170962333679,
|
|
"eval_alpaca_cleaned.json_runtime": 0.1147,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 235.421,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.439,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_agent_instruct.json_loss": 0.21443764865398407,
|
|
"eval_agent_instruct.json_runtime": 0.5141,
|
|
"eval_agent_instruct.json_samples_per_second": 93.36,
|
|
"eval_agent_instruct.json_steps_per_second": 3.89,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_MathInstruct.json_loss": 0.1956825852394104,
|
|
"eval_MathInstruct.json_runtime": 0.3499,
|
|
"eval_MathInstruct.json_samples_per_second": 162.885,
|
|
"eval_MathInstruct.json_steps_per_second": 8.573,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.4434005320072174,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.3023,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 112.46,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.615,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.009229443967342377,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.1004,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 229.185,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 9.965,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.5269618034362793,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.0516,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 310.215,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.388,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_alpaca_gpt4_zh.json_loss": 0.9699357151985168,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.0505,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 217.964,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 19.815,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6604519968353342,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.5749525427818298,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.4875,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 41.023,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.051,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 0.6610023734993636,
|
|
"grad_norm": 0.38521307706832886,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4408,
|
|
"step": 2402
|
|
},
|
|
{
|
|
"epoch": 0.6615527501633931,
|
|
"grad_norm": 0.35963118076324463,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4252,
|
|
"step": 2404
|
|
},
|
|
{
|
|
"epoch": 0.6621031268274226,
|
|
"grad_norm": 0.34755435585975647,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4319,
|
|
"step": 2406
|
|
},
|
|
{
|
|
"epoch": 0.6626535034914519,
|
|
"grad_norm": 0.37133127450942993,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4237,
|
|
"step": 2408
|
|
},
|
|
{
|
|
"epoch": 0.6632038801554814,
|
|
"grad_norm": 0.35870301723480225,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4388,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 0.6637542568195108,
|
|
"grad_norm": 0.357415109872818,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4322,
|
|
"step": 2412
|
|
},
|
|
{
|
|
"epoch": 0.6643046334835403,
|
|
"grad_norm": 0.3610486090183258,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.434,
|
|
"step": 2414
|
|
},
|
|
{
|
|
"epoch": 0.6648550101475698,
|
|
"grad_norm": 0.35058531165122986,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4325,
|
|
"step": 2416
|
|
},
|
|
{
|
|
"epoch": 0.6654053868115992,
|
|
"grad_norm": 0.3732353448867798,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4266,
|
|
"step": 2418
|
|
},
|
|
{
|
|
"epoch": 0.6659557634756287,
|
|
"grad_norm": 0.3728616535663605,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4373,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 0.666506140139658,
|
|
"grad_norm": 0.3697822093963623,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4263,
|
|
"step": 2422
|
|
},
|
|
{
|
|
"epoch": 0.6670565168036875,
|
|
"grad_norm": 0.34242671728134155,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4234,
|
|
"step": 2424
|
|
},
|
|
{
|
|
"epoch": 0.667606893467717,
|
|
"grad_norm": 0.34660401940345764,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4438,
|
|
"step": 2426
|
|
},
|
|
{
|
|
"epoch": 0.6681572701317464,
|
|
"grad_norm": 0.36335524916648865,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.447,
|
|
"step": 2428
|
|
},
|
|
{
|
|
"epoch": 0.6687076467957759,
|
|
"grad_norm": 0.39879950881004333,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4328,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 0.6692580234598053,
|
|
"grad_norm": 0.3318917453289032,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.418,
|
|
"step": 2432
|
|
},
|
|
{
|
|
"epoch": 0.6698084001238348,
|
|
"grad_norm": 0.3548910319805145,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4297,
|
|
"step": 2434
|
|
},
|
|
{
|
|
"epoch": 0.6703587767878642,
|
|
"grad_norm": 0.35431650280952454,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4442,
|
|
"step": 2436
|
|
},
|
|
{
|
|
"epoch": 0.6709091534518936,
|
|
"grad_norm": 0.3501831889152527,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4231,
|
|
"step": 2438
|
|
},
|
|
{
|
|
"epoch": 0.6714595301159231,
|
|
"grad_norm": 0.3664182424545288,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4307,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 0.6720099067799525,
|
|
"grad_norm": 0.36051392555236816,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4348,
|
|
"step": 2442
|
|
},
|
|
{
|
|
"epoch": 0.672560283443982,
|
|
"grad_norm": 0.38968268036842346,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.44,
|
|
"step": 2444
|
|
},
|
|
{
|
|
"epoch": 0.6731106601080115,
|
|
"grad_norm": 0.34485840797424316,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4387,
|
|
"step": 2446
|
|
},
|
|
{
|
|
"epoch": 0.6736610367720408,
|
|
"grad_norm": 0.36389604210853577,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4279,
|
|
"step": 2448
|
|
},
|
|
{
|
|
"epoch": 0.6742114134360703,
|
|
"grad_norm": 0.3703545331954956,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4498,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 0.6747617901000997,
|
|
"grad_norm": 0.34628036618232727,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4145,
|
|
"step": 2452
|
|
},
|
|
{
|
|
"epoch": 0.6753121667641292,
|
|
"grad_norm": 0.3569451570510864,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4308,
|
|
"step": 2454
|
|
},
|
|
{
|
|
"epoch": 0.6758625434281587,
|
|
"grad_norm": 0.3471825122833252,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4299,
|
|
"step": 2456
|
|
},
|
|
{
|
|
"epoch": 0.6764129200921881,
|
|
"grad_norm": 0.37446585297584534,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4417,
|
|
"step": 2458
|
|
},
|
|
{
|
|
"epoch": 0.6769632967562176,
|
|
"grad_norm": 0.355708509683609,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4306,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 0.6775136734202469,
|
|
"grad_norm": 0.36398351192474365,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4331,
|
|
"step": 2462
|
|
},
|
|
{
|
|
"epoch": 0.6780640500842764,
|
|
"grad_norm": 0.38390782475471497,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4421,
|
|
"step": 2464
|
|
},
|
|
{
|
|
"epoch": 0.6786144267483059,
|
|
"grad_norm": 0.3586190938949585,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4365,
|
|
"step": 2466
|
|
},
|
|
{
|
|
"epoch": 0.6791648034123353,
|
|
"grad_norm": 0.33874934911727905,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4346,
|
|
"step": 2468
|
|
},
|
|
{
|
|
"epoch": 0.6797151800763648,
|
|
"grad_norm": 0.3699466586112976,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4282,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 0.6802655567403942,
|
|
"grad_norm": 0.35685962438583374,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4386,
|
|
"step": 2472
|
|
},
|
|
{
|
|
"epoch": 0.6808159334044237,
|
|
"grad_norm": 0.36509183049201965,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4234,
|
|
"step": 2474
|
|
},
|
|
{
|
|
"epoch": 0.681366310068453,
|
|
"grad_norm": 0.3677407503128052,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4327,
|
|
"step": 2476
|
|
},
|
|
{
|
|
"epoch": 0.6819166867324825,
|
|
"grad_norm": 0.361396849155426,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4282,
|
|
"step": 2478
|
|
},
|
|
{
|
|
"epoch": 0.682467063396512,
|
|
"grad_norm": 0.3637540936470032,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4304,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 0.6830174400605414,
|
|
"grad_norm": 0.38396722078323364,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4326,
|
|
"step": 2482
|
|
},
|
|
{
|
|
"epoch": 0.6835678167245709,
|
|
"grad_norm": 0.3760308623313904,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4288,
|
|
"step": 2484
|
|
},
|
|
{
|
|
"epoch": 0.6841181933886004,
|
|
"grad_norm": 0.36777281761169434,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4435,
|
|
"step": 2486
|
|
},
|
|
{
|
|
"epoch": 0.6846685700526298,
|
|
"grad_norm": 0.36967626214027405,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4247,
|
|
"step": 2488
|
|
},
|
|
{
|
|
"epoch": 0.6852189467166592,
|
|
"grad_norm": 0.37309199571609497,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4514,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 0.6857693233806886,
|
|
"grad_norm": 0.35478582978248596,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.436,
|
|
"step": 2492
|
|
},
|
|
{
|
|
"epoch": 0.6863197000447181,
|
|
"grad_norm": 0.35142141580581665,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4289,
|
|
"step": 2494
|
|
},
|
|
{
|
|
"epoch": 0.6868700767087476,
|
|
"grad_norm": 0.37468215823173523,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4363,
|
|
"step": 2496
|
|
},
|
|
{
|
|
"epoch": 0.687420453372777,
|
|
"grad_norm": 0.3481496572494507,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.441,
|
|
"step": 2498
|
|
},
|
|
{
|
|
"epoch": 0.6879708300368065,
|
|
"grad_norm": 0.34628838300704956,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4425,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.6885212067008358,
|
|
"grad_norm": 0.3759724497795105,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4322,
|
|
"step": 2502
|
|
},
|
|
{
|
|
"epoch": 0.6890715833648653,
|
|
"grad_norm": 0.37153902649879456,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4412,
|
|
"step": 2504
|
|
},
|
|
{
|
|
"epoch": 0.6896219600288948,
|
|
"grad_norm": 0.3601967990398407,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4314,
|
|
"step": 2506
|
|
},
|
|
{
|
|
"epoch": 0.6901723366929242,
|
|
"grad_norm": 0.3510344326496124,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4261,
|
|
"step": 2508
|
|
},
|
|
{
|
|
"epoch": 0.6907227133569537,
|
|
"grad_norm": 0.34007585048675537,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4272,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 0.6912730900209831,
|
|
"grad_norm": 0.34424078464508057,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4253,
|
|
"step": 2512
|
|
},
|
|
{
|
|
"epoch": 0.6918234666850126,
|
|
"grad_norm": 0.36498820781707764,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.434,
|
|
"step": 2514
|
|
},
|
|
{
|
|
"epoch": 0.692373843349042,
|
|
"grad_norm": 0.3697148859500885,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4358,
|
|
"step": 2516
|
|
},
|
|
{
|
|
"epoch": 0.6929242200130714,
|
|
"grad_norm": 0.36114463210105896,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4177,
|
|
"step": 2518
|
|
},
|
|
{
|
|
"epoch": 0.6934745966771009,
|
|
"grad_norm": 0.3630925714969635,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4438,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 0.6940249733411303,
|
|
"grad_norm": 0.36949414014816284,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4281,
|
|
"step": 2522
|
|
},
|
|
{
|
|
"epoch": 0.6945753500051598,
|
|
"grad_norm": 0.36324694752693176,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4253,
|
|
"step": 2524
|
|
},
|
|
{
|
|
"epoch": 0.6951257266691893,
|
|
"grad_norm": 0.3471947908401489,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4215,
|
|
"step": 2526
|
|
},
|
|
{
|
|
"epoch": 0.6956761033332187,
|
|
"grad_norm": 0.33943814039230347,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4546,
|
|
"step": 2528
|
|
},
|
|
{
|
|
"epoch": 0.6962264799972481,
|
|
"grad_norm": 0.34675729274749756,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4191,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 0.6967768566612775,
|
|
"grad_norm": 0.3519613742828369,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4272,
|
|
"step": 2532
|
|
},
|
|
{
|
|
"epoch": 0.697327233325307,
|
|
"grad_norm": 0.3635639548301697,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4489,
|
|
"step": 2534
|
|
},
|
|
{
|
|
"epoch": 0.6978776099893365,
|
|
"grad_norm": 0.3636915385723114,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4233,
|
|
"step": 2536
|
|
},
|
|
{
|
|
"epoch": 0.6984279866533659,
|
|
"grad_norm": 0.36174023151397705,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.425,
|
|
"step": 2538
|
|
},
|
|
{
|
|
"epoch": 0.6989783633173954,
|
|
"grad_norm": 0.35721176862716675,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4279,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 0.6995287399814248,
|
|
"grad_norm": 0.35394319891929626,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4279,
|
|
"step": 2542
|
|
},
|
|
{
|
|
"epoch": 0.7000791166454542,
|
|
"grad_norm": 0.37505972385406494,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.423,
|
|
"step": 2544
|
|
},
|
|
{
|
|
"epoch": 0.7006294933094837,
|
|
"grad_norm": 0.3504476249217987,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4212,
|
|
"step": 2546
|
|
},
|
|
{
|
|
"epoch": 0.7011798699735131,
|
|
"grad_norm": 0.39700883626937866,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4257,
|
|
"step": 2548
|
|
},
|
|
{
|
|
"epoch": 0.7017302466375426,
|
|
"grad_norm": 0.36360886693000793,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4276,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 0.702280623301572,
|
|
"grad_norm": 0.36123448610305786,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4266,
|
|
"step": 2552
|
|
},
|
|
{
|
|
"epoch": 0.7028309999656015,
|
|
"grad_norm": 0.35183826088905334,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.421,
|
|
"step": 2554
|
|
},
|
|
{
|
|
"epoch": 0.7033813766296309,
|
|
"grad_norm": 0.3557921350002289,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4239,
|
|
"step": 2556
|
|
},
|
|
{
|
|
"epoch": 0.7039317532936603,
|
|
"grad_norm": 0.35415929555892944,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4216,
|
|
"step": 2558
|
|
},
|
|
{
|
|
"epoch": 0.7044821299576898,
|
|
"grad_norm": 0.3662279546260834,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4268,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 0.7050325066217192,
|
|
"grad_norm": 0.35718172788619995,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4213,
|
|
"step": 2562
|
|
},
|
|
{
|
|
"epoch": 0.7055828832857487,
|
|
"grad_norm": 0.3595860004425049,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4398,
|
|
"step": 2564
|
|
},
|
|
{
|
|
"epoch": 0.7061332599497782,
|
|
"grad_norm": 0.3576621413230896,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4263,
|
|
"step": 2566
|
|
},
|
|
{
|
|
"epoch": 0.7066836366138076,
|
|
"grad_norm": 0.3699706792831421,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4331,
|
|
"step": 2568
|
|
},
|
|
{
|
|
"epoch": 0.707234013277837,
|
|
"grad_norm": 0.38423609733581543,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.436,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 0.7077843899418664,
|
|
"grad_norm": 0.3747715651988983,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4335,
|
|
"step": 2572
|
|
},
|
|
{
|
|
"epoch": 0.7083347666058959,
|
|
"grad_norm": 0.3554603159427643,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4236,
|
|
"step": 2574
|
|
},
|
|
{
|
|
"epoch": 0.7088851432699254,
|
|
"grad_norm": 0.35446056723594666,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4235,
|
|
"step": 2576
|
|
},
|
|
{
|
|
"epoch": 0.7094355199339548,
|
|
"grad_norm": 0.3770659267902374,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4344,
|
|
"step": 2578
|
|
},
|
|
{
|
|
"epoch": 0.7099858965979843,
|
|
"grad_norm": 0.35676074028015137,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4241,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 0.7105362732620137,
|
|
"grad_norm": 0.3687559962272644,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4329,
|
|
"step": 2582
|
|
},
|
|
{
|
|
"epoch": 0.7110866499260431,
|
|
"grad_norm": 0.35311195254325867,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4355,
|
|
"step": 2584
|
|
},
|
|
{
|
|
"epoch": 0.7116370265900726,
|
|
"grad_norm": 0.3590395152568817,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4213,
|
|
"step": 2586
|
|
},
|
|
{
|
|
"epoch": 0.712187403254102,
|
|
"grad_norm": 0.3694981336593628,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4344,
|
|
"step": 2588
|
|
},
|
|
{
|
|
"epoch": 0.7127377799181315,
|
|
"grad_norm": 0.3516077399253845,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4202,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 0.7132881565821609,
|
|
"grad_norm": 0.38859254121780396,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4179,
|
|
"step": 2592
|
|
},
|
|
{
|
|
"epoch": 0.7138385332461904,
|
|
"grad_norm": 0.3825247883796692,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4393,
|
|
"step": 2594
|
|
},
|
|
{
|
|
"epoch": 0.7143889099102199,
|
|
"grad_norm": 0.36817750334739685,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4341,
|
|
"step": 2596
|
|
},
|
|
{
|
|
"epoch": 0.7149392865742492,
|
|
"grad_norm": 0.36351174116134644,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4355,
|
|
"step": 2598
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"grad_norm": 0.3494237959384918,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4176,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_merge_loss": 0.3735547661781311,
|
|
"eval_merge_runtime": 599.6483,
|
|
"eval_merge_samples_per_second": 56.286,
|
|
"eval_merge_steps_per_second": 2.346,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.4953900873661041,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.4567,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 73.35,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 3.06,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_sharegpt_gpt4.json_loss": 0.748174786567688,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.7349,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.642,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.458,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_Table_GPT.json_loss": 0.04870549216866493,
|
|
"eval_Table_GPT.json_runtime": 25.0368,
|
|
"eval_Table_GPT.json_samples_per_second": 83.597,
|
|
"eval_Table_GPT.json_steps_per_second": 3.515,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_gpt_4o_200k.json_loss": 0.775393009185791,
|
|
"eval_gpt_4o_200k.json_runtime": 48.6152,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.198,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.389,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_multi_turn_datas.json_loss": 0.29874685406684875,
|
|
"eval_multi_turn_datas.json_runtime": 75.9064,
|
|
"eval_multi_turn_datas.json_samples_per_second": 52.723,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.2,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_table_python_code_datas.json_loss": 0.2535416781902313,
|
|
"eval_table_python_code_datas.json_runtime": 43.2787,
|
|
"eval_table_python_code_datas.json_samples_per_second": 49.886,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.08,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_tabular_llm_data.json_loss": 0.08522781729698181,
|
|
"eval_tabular_llm_data.json_runtime": 8.609,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.575,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.278,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_python_code_critic_21k.json_loss": 0.5531289577484131,
|
|
"eval_python_code_critic_21k.json_runtime": 3.2416,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 184.167,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.712,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_all_merge_table_dataset.json_loss": 0.07141314446926117,
|
|
"eval_all_merge_table_dataset.json_runtime": 23.4197,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 30.402,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.281,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.5697857737541199,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.4913,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 67.741,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.832,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_ultrainteract_sft.json_loss": 0.406777024269104,
|
|
"eval_ultrainteract_sft.json_runtime": 8.6553,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 168.22,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.048,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.09255770593881607,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.1264,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 268.887,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.817,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_sft_react_sql_datas.json_loss": 0.6156443953514099,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.8669,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 39.914,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.78,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_all_merge_code.json_loss": 0.2804557681083679,
|
|
"eval_all_merge_code.json_runtime": 0.3331,
|
|
"eval_all_merge_code.json_samples_per_second": 189.109,
|
|
"eval_all_merge_code.json_steps_per_second": 9.005,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_magpie_datas.json_loss": 0.42615047097206116,
|
|
"eval_magpie_datas.json_runtime": 2.2188,
|
|
"eval_magpie_datas.json_samples_per_second": 77.518,
|
|
"eval_magpie_datas.json_steps_per_second": 3.605,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_train_data_for_qwen.json_loss": 0.005531710106879473,
|
|
"eval_train_data_for_qwen.json_runtime": 0.2446,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 40.888,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.089,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_alpaca_cleaned.json_loss": 0.8993179202079773,
|
|
"eval_alpaca_cleaned.json_runtime": 0.1158,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 233.205,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.274,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_agent_instruct.json_loss": 0.20902203023433685,
|
|
"eval_agent_instruct.json_runtime": 0.5148,
|
|
"eval_agent_instruct.json_samples_per_second": 93.239,
|
|
"eval_agent_instruct.json_steps_per_second": 3.885,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_MathInstruct.json_loss": 0.20088934898376465,
|
|
"eval_MathInstruct.json_runtime": 0.3521,
|
|
"eval_MathInstruct.json_samples_per_second": 161.889,
|
|
"eval_MathInstruct.json_steps_per_second": 8.52,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.44206199049949646,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.3013,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 112.861,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.639,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.00838589109480381,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.1004,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 229.101,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 9.961,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.5224987268447876,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.0517,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 309.243,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.328,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_alpaca_gpt4_zh.json_loss": 0.9841532111167908,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.0501,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 219.503,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 19.955,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7154896632382787,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.5787987112998962,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.4863,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 41.126,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.056,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 0.7160400399023081,
|
|
"grad_norm": 0.3617021143436432,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4292,
|
|
"step": 2602
|
|
},
|
|
{
|
|
"epoch": 0.7165904165663376,
|
|
"grad_norm": 0.39201030135154724,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4565,
|
|
"step": 2604
|
|
},
|
|
{
|
|
"epoch": 0.7171407932303671,
|
|
"grad_norm": 0.3617227077484131,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4279,
|
|
"step": 2606
|
|
},
|
|
{
|
|
"epoch": 0.7176911698943965,
|
|
"grad_norm": 0.3502630591392517,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4191,
|
|
"step": 2608
|
|
},
|
|
{
|
|
"epoch": 0.7182415465584259,
|
|
"grad_norm": 0.41853633522987366,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4122,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 0.7187919232224553,
|
|
"grad_norm": 0.35474300384521484,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4225,
|
|
"step": 2612
|
|
},
|
|
{
|
|
"epoch": 0.7193422998864848,
|
|
"grad_norm": 0.3673190772533417,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4307,
|
|
"step": 2614
|
|
},
|
|
{
|
|
"epoch": 0.7198926765505143,
|
|
"grad_norm": 0.383365273475647,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4335,
|
|
"step": 2616
|
|
},
|
|
{
|
|
"epoch": 0.7204430532145437,
|
|
"grad_norm": 0.35813844203948975,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4462,
|
|
"step": 2618
|
|
},
|
|
{
|
|
"epoch": 0.7209934298785732,
|
|
"grad_norm": 0.7552120685577393,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4209,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 0.7215438065426026,
|
|
"grad_norm": 0.365175724029541,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.441,
|
|
"step": 2622
|
|
},
|
|
{
|
|
"epoch": 0.722094183206632,
|
|
"grad_norm": 0.3450736701488495,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4302,
|
|
"step": 2624
|
|
},
|
|
{
|
|
"epoch": 0.7226445598706615,
|
|
"grad_norm": 0.34044018387794495,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4265,
|
|
"step": 2626
|
|
},
|
|
{
|
|
"epoch": 0.7231949365346909,
|
|
"grad_norm": 0.36393091082572937,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4226,
|
|
"step": 2628
|
|
},
|
|
{
|
|
"epoch": 0.7237453131987204,
|
|
"grad_norm": 0.3462166488170624,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4236,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 0.7242956898627498,
|
|
"grad_norm": 0.4024192988872528,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4377,
|
|
"step": 2632
|
|
},
|
|
{
|
|
"epoch": 0.7248460665267793,
|
|
"grad_norm": 0.354809045791626,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4245,
|
|
"step": 2634
|
|
},
|
|
{
|
|
"epoch": 0.7253964431908088,
|
|
"grad_norm": 0.3701523244380951,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.438,
|
|
"step": 2636
|
|
},
|
|
{
|
|
"epoch": 0.7259468198548381,
|
|
"grad_norm": 0.37080636620521545,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4299,
|
|
"step": 2638
|
|
},
|
|
{
|
|
"epoch": 0.7264971965188676,
|
|
"grad_norm": 0.3205287754535675,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4193,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 0.727047573182897,
|
|
"grad_norm": 0.3642041087150574,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4259,
|
|
"step": 2642
|
|
},
|
|
{
|
|
"epoch": 0.7275979498469265,
|
|
"grad_norm": 0.34573763608932495,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.438,
|
|
"step": 2644
|
|
},
|
|
{
|
|
"epoch": 0.728148326510956,
|
|
"grad_norm": 0.3501754701137543,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4184,
|
|
"step": 2646
|
|
},
|
|
{
|
|
"epoch": 0.7286987031749854,
|
|
"grad_norm": 0.35315144062042236,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4236,
|
|
"step": 2648
|
|
},
|
|
{
|
|
"epoch": 0.7292490798390149,
|
|
"grad_norm": 0.36585912108421326,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4205,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 0.7297994565030442,
|
|
"grad_norm": 0.3684290051460266,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4366,
|
|
"step": 2652
|
|
},
|
|
{
|
|
"epoch": 0.7303498331670737,
|
|
"grad_norm": 0.3628571927547455,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4205,
|
|
"step": 2654
|
|
},
|
|
{
|
|
"epoch": 0.7309002098311032,
|
|
"grad_norm": 0.36779502034187317,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4338,
|
|
"step": 2656
|
|
},
|
|
{
|
|
"epoch": 0.7314505864951326,
|
|
"grad_norm": 0.3522249162197113,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4268,
|
|
"step": 2658
|
|
},
|
|
{
|
|
"epoch": 0.7320009631591621,
|
|
"grad_norm": 0.3840633034706116,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.425,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 0.7325513398231915,
|
|
"grad_norm": 0.3498011529445648,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4269,
|
|
"step": 2662
|
|
},
|
|
{
|
|
"epoch": 0.7331017164872209,
|
|
"grad_norm": 0.36151036620140076,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4215,
|
|
"step": 2664
|
|
},
|
|
{
|
|
"epoch": 0.7336520931512504,
|
|
"grad_norm": 0.37008973956108093,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4468,
|
|
"step": 2666
|
|
},
|
|
{
|
|
"epoch": 0.7342024698152798,
|
|
"grad_norm": 0.3440816104412079,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4349,
|
|
"step": 2668
|
|
},
|
|
{
|
|
"epoch": 0.7347528464793093,
|
|
"grad_norm": 0.3912747800350189,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4188,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 0.7353032231433387,
|
|
"grad_norm": 0.3472096025943756,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4344,
|
|
"step": 2672
|
|
},
|
|
{
|
|
"epoch": 0.7358535998073682,
|
|
"grad_norm": 0.3477676510810852,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4226,
|
|
"step": 2674
|
|
},
|
|
{
|
|
"epoch": 0.7364039764713977,
|
|
"grad_norm": 0.3726285696029663,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4263,
|
|
"step": 2676
|
|
},
|
|
{
|
|
"epoch": 0.736954353135427,
|
|
"grad_norm": 0.3610732853412628,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4272,
|
|
"step": 2678
|
|
},
|
|
{
|
|
"epoch": 0.7375047297994565,
|
|
"grad_norm": 0.35711386799812317,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4356,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 0.7380551064634859,
|
|
"grad_norm": 0.36050212383270264,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.437,
|
|
"step": 2682
|
|
},
|
|
{
|
|
"epoch": 0.7386054831275154,
|
|
"grad_norm": 0.33842894434928894,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4136,
|
|
"step": 2684
|
|
},
|
|
{
|
|
"epoch": 0.7391558597915449,
|
|
"grad_norm": 0.35878267884254456,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4118,
|
|
"step": 2686
|
|
},
|
|
{
|
|
"epoch": 0.7397062364555743,
|
|
"grad_norm": 0.3504185676574707,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4157,
|
|
"step": 2688
|
|
},
|
|
{
|
|
"epoch": 0.7402566131196038,
|
|
"grad_norm": 0.35226139426231384,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4194,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 0.7408069897836331,
|
|
"grad_norm": 0.3720513880252838,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4225,
|
|
"step": 2692
|
|
},
|
|
{
|
|
"epoch": 0.7413573664476626,
|
|
"grad_norm": 0.3444679081439972,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.433,
|
|
"step": 2694
|
|
},
|
|
{
|
|
"epoch": 0.741907743111692,
|
|
"grad_norm": 0.3685862421989441,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4139,
|
|
"step": 2696
|
|
},
|
|
{
|
|
"epoch": 0.7424581197757215,
|
|
"grad_norm": 0.36269327998161316,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4277,
|
|
"step": 2698
|
|
},
|
|
{
|
|
"epoch": 0.743008496439751,
|
|
"grad_norm": 0.36458590626716614,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4217,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 0.7435588731037804,
|
|
"grad_norm": 0.3453613221645355,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4174,
|
|
"step": 2702
|
|
},
|
|
{
|
|
"epoch": 0.7441092497678099,
|
|
"grad_norm": 0.3562467098236084,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4313,
|
|
"step": 2704
|
|
},
|
|
{
|
|
"epoch": 0.7446596264318392,
|
|
"grad_norm": 0.3774909973144531,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.432,
|
|
"step": 2706
|
|
},
|
|
{
|
|
"epoch": 0.7452100030958687,
|
|
"grad_norm": 0.3668104112148285,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4236,
|
|
"step": 2708
|
|
},
|
|
{
|
|
"epoch": 0.7457603797598982,
|
|
"grad_norm": 0.38669878244400024,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4432,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 0.7463107564239276,
|
|
"grad_norm": 0.3985156714916229,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4422,
|
|
"step": 2712
|
|
},
|
|
{
|
|
"epoch": 0.7468611330879571,
|
|
"grad_norm": 0.3647630512714386,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4273,
|
|
"step": 2714
|
|
},
|
|
{
|
|
"epoch": 0.7474115097519866,
|
|
"grad_norm": 0.37027841806411743,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4166,
|
|
"step": 2716
|
|
},
|
|
{
|
|
"epoch": 0.7479618864160159,
|
|
"grad_norm": 0.3770820200443268,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4461,
|
|
"step": 2718
|
|
},
|
|
{
|
|
"epoch": 0.7485122630800454,
|
|
"grad_norm": 0.35209086537361145,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4473,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 0.7490626397440748,
|
|
"grad_norm": 0.38394030928611755,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4353,
|
|
"step": 2722
|
|
},
|
|
{
|
|
"epoch": 0.7496130164081043,
|
|
"grad_norm": 0.3524518311023712,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4277,
|
|
"step": 2724
|
|
},
|
|
{
|
|
"epoch": 0.7501633930721338,
|
|
"grad_norm": 0.35822972655296326,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4277,
|
|
"step": 2726
|
|
},
|
|
{
|
|
"epoch": 0.7507137697361632,
|
|
"grad_norm": 0.3409929573535919,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4172,
|
|
"step": 2728
|
|
},
|
|
{
|
|
"epoch": 0.7512641464001927,
|
|
"grad_norm": 0.3534572422504425,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.431,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 0.751814523064222,
|
|
"grad_norm": 0.3565024733543396,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4297,
|
|
"step": 2732
|
|
},
|
|
{
|
|
"epoch": 0.7523648997282515,
|
|
"grad_norm": 0.3499157130718231,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4131,
|
|
"step": 2734
|
|
},
|
|
{
|
|
"epoch": 0.752915276392281,
|
|
"grad_norm": 0.37271568179130554,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4224,
|
|
"step": 2736
|
|
},
|
|
{
|
|
"epoch": 0.7534656530563104,
|
|
"grad_norm": 0.38281935453414917,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4366,
|
|
"step": 2738
|
|
},
|
|
{
|
|
"epoch": 0.7540160297203399,
|
|
"grad_norm": 0.35982009768486023,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4384,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 0.7545664063843693,
|
|
"grad_norm": 0.3618968427181244,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4484,
|
|
"step": 2742
|
|
},
|
|
{
|
|
"epoch": 0.7551167830483988,
|
|
"grad_norm": 0.35112181305885315,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4132,
|
|
"step": 2744
|
|
},
|
|
{
|
|
"epoch": 0.7556671597124281,
|
|
"grad_norm": 0.35898518562316895,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4234,
|
|
"step": 2746
|
|
},
|
|
{
|
|
"epoch": 0.7562175363764576,
|
|
"grad_norm": 0.36049455404281616,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4254,
|
|
"step": 2748
|
|
},
|
|
{
|
|
"epoch": 0.7567679130404871,
|
|
"grad_norm": 0.3698630630970001,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4387,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 0.7573182897045165,
|
|
"grad_norm": 0.36196333169937134,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4242,
|
|
"step": 2752
|
|
},
|
|
{
|
|
"epoch": 0.757868666368546,
|
|
"grad_norm": 0.3553547263145447,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4332,
|
|
"step": 2754
|
|
},
|
|
{
|
|
"epoch": 0.7584190430325755,
|
|
"grad_norm": 0.36536121368408203,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4123,
|
|
"step": 2756
|
|
},
|
|
{
|
|
"epoch": 0.7589694196966049,
|
|
"grad_norm": 0.3394269049167633,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4115,
|
|
"step": 2758
|
|
},
|
|
{
|
|
"epoch": 0.7595197963606343,
|
|
"grad_norm": 0.35857659578323364,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4174,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 0.7600701730246637,
|
|
"grad_norm": 0.3676673173904419,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4334,
|
|
"step": 2762
|
|
},
|
|
{
|
|
"epoch": 0.7606205496886932,
|
|
"grad_norm": 0.35949233174324036,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4345,
|
|
"step": 2764
|
|
},
|
|
{
|
|
"epoch": 0.7611709263527227,
|
|
"grad_norm": 0.368569940328598,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4241,
|
|
"step": 2766
|
|
},
|
|
{
|
|
"epoch": 0.7617213030167521,
|
|
"grad_norm": 0.37473535537719727,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4454,
|
|
"step": 2768
|
|
},
|
|
{
|
|
"epoch": 0.7622716796807816,
|
|
"grad_norm": 0.34766483306884766,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4193,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 0.7628220563448109,
|
|
"grad_norm": 0.3594741225242615,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4265,
|
|
"step": 2772
|
|
},
|
|
{
|
|
"epoch": 0.7633724330088404,
|
|
"grad_norm": 0.35876014828681946,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4401,
|
|
"step": 2774
|
|
},
|
|
{
|
|
"epoch": 0.7639228096728699,
|
|
"grad_norm": 0.3698675036430359,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4301,
|
|
"step": 2776
|
|
},
|
|
{
|
|
"epoch": 0.7644731863368993,
|
|
"grad_norm": 0.3890196979045868,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4312,
|
|
"step": 2778
|
|
},
|
|
{
|
|
"epoch": 0.7650235630009288,
|
|
"grad_norm": 0.3495800793170929,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4235,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 0.7655739396649582,
|
|
"grad_norm": 0.3536211848258972,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4319,
|
|
"step": 2782
|
|
},
|
|
{
|
|
"epoch": 0.7661243163289877,
|
|
"grad_norm": 0.35744360089302063,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.419,
|
|
"step": 2784
|
|
},
|
|
{
|
|
"epoch": 0.766674692993017,
|
|
"grad_norm": 0.35292670130729675,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4428,
|
|
"step": 2786
|
|
},
|
|
{
|
|
"epoch": 0.7672250696570465,
|
|
"grad_norm": 0.32827427983283997,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4175,
|
|
"step": 2788
|
|
},
|
|
{
|
|
"epoch": 0.767775446321076,
|
|
"grad_norm": 0.3385542929172516,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4288,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 0.7683258229851054,
|
|
"grad_norm": 0.3474958539009094,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4424,
|
|
"step": 2792
|
|
},
|
|
{
|
|
"epoch": 0.7688761996491349,
|
|
"grad_norm": 0.3551865816116333,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4351,
|
|
"step": 2794
|
|
},
|
|
{
|
|
"epoch": 0.7694265763131644,
|
|
"grad_norm": 0.3616306781768799,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4481,
|
|
"step": 2796
|
|
},
|
|
{
|
|
"epoch": 0.7699769529771938,
|
|
"grad_norm": 0.36132022738456726,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4128,
|
|
"step": 2798
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"grad_norm": 0.3580198585987091,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4242,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_merge_loss": 0.3696165680885315,
|
|
"eval_merge_runtime": 600.0202,
|
|
"eval_merge_samples_per_second": 56.251,
|
|
"eval_merge_steps_per_second": 2.345,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.49126043915748596,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.3252,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 74.285,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 3.099,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_sharegpt_gpt4.json_loss": 0.7416729927062988,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.6069,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.88,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.468,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_Table_GPT.json_loss": 0.04911120608448982,
|
|
"eval_Table_GPT.json_runtime": 24.9282,
|
|
"eval_Table_GPT.json_samples_per_second": 83.961,
|
|
"eval_Table_GPT.json_steps_per_second": 3.53,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_gpt_4o_200k.json_loss": 0.7679291367530823,
|
|
"eval_gpt_4o_200k.json_runtime": 48.4021,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.767,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.413,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_multi_turn_datas.json_loss": 0.2913420498371124,
|
|
"eval_multi_turn_datas.json_runtime": 75.4573,
|
|
"eval_multi_turn_datas.json_samples_per_second": 53.037,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.213,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_table_python_code_datas.json_loss": 0.25055599212646484,
|
|
"eval_table_python_code_datas.json_runtime": 43.009,
|
|
"eval_table_python_code_datas.json_samples_per_second": 50.199,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.093,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_tabular_llm_data.json_loss": 0.07946833223104477,
|
|
"eval_tabular_llm_data.json_runtime": 8.5236,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.861,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.291,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_python_code_critic_21k.json_loss": 0.5505719184875488,
|
|
"eval_python_code_critic_21k.json_runtime": 3.2237,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 185.192,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.755,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_all_merge_table_dataset.json_loss": 0.07032839208841324,
|
|
"eval_all_merge_table_dataset.json_runtime": 23.2519,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 30.621,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.29,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.5668665766716003,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.3765,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 67.981,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.842,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_ultrainteract_sft.json_loss": 0.405385285615921,
|
|
"eval_ultrainteract_sft.json_runtime": 8.6576,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 168.176,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.046,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.0894596055150032,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.1263,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 269.263,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.839,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_sft_react_sql_datas.json_loss": 0.6155156493186951,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.8457,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 40.022,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.784,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_all_merge_code.json_loss": 0.2757679224014282,
|
|
"eval_all_merge_code.json_runtime": 0.3332,
|
|
"eval_all_merge_code.json_samples_per_second": 189.076,
|
|
"eval_all_merge_code.json_steps_per_second": 9.004,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_magpie_datas.json_loss": 0.42383918166160583,
|
|
"eval_magpie_datas.json_runtime": 2.2093,
|
|
"eval_magpie_datas.json_samples_per_second": 77.853,
|
|
"eval_magpie_datas.json_steps_per_second": 3.621,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_train_data_for_qwen.json_loss": 0.0028582699596881866,
|
|
"eval_train_data_for_qwen.json_runtime": 0.244,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 40.988,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.099,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_alpaca_cleaned.json_loss": 0.9000511169433594,
|
|
"eval_alpaca_cleaned.json_runtime": 0.1144,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 235.991,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.481,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_agent_instruct.json_loss": 0.21006985008716583,
|
|
"eval_agent_instruct.json_runtime": 0.5133,
|
|
"eval_agent_instruct.json_samples_per_second": 93.518,
|
|
"eval_agent_instruct.json_steps_per_second": 3.897,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_MathInstruct.json_loss": 0.19836944341659546,
|
|
"eval_MathInstruct.json_runtime": 0.3623,
|
|
"eval_MathInstruct.json_samples_per_second": 157.336,
|
|
"eval_MathInstruct.json_steps_per_second": 8.281,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.44593295454978943,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.303,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 112.196,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.6,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.0066245682537555695,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.1016,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 226.385,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 9.843,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.5253314971923828,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.052,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 307.853,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.241,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_alpaca_gpt4_zh.json_loss": 0.9524829983711243,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.0499,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 220.602,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 20.055,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7705273296412232,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.5769651532173157,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.4873,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 41.047,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.052,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 0.7710777063052526,
|
|
"grad_norm": 0.3490790128707886,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4279,
|
|
"step": 2802
|
|
},
|
|
{
|
|
"epoch": 0.7716280829692821,
|
|
"grad_norm": 0.39200064539909363,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4419,
|
|
"step": 2804
|
|
},
|
|
{
|
|
"epoch": 0.7721784596333116,
|
|
"grad_norm": 0.36754128336906433,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4298,
|
|
"step": 2806
|
|
},
|
|
{
|
|
"epoch": 0.772728836297341,
|
|
"grad_norm": 0.3482655882835388,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4249,
|
|
"step": 2808
|
|
},
|
|
{
|
|
"epoch": 0.7732792129613705,
|
|
"grad_norm": 0.35949841141700745,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4245,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 0.7738295896253999,
|
|
"grad_norm": 0.3631410598754883,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4221,
|
|
"step": 2812
|
|
},
|
|
{
|
|
"epoch": 0.7743799662894293,
|
|
"grad_norm": 0.3531825542449951,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.415,
|
|
"step": 2814
|
|
},
|
|
{
|
|
"epoch": 0.7749303429534588,
|
|
"grad_norm": 0.3741169571876526,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.421,
|
|
"step": 2816
|
|
},
|
|
{
|
|
"epoch": 0.7754807196174882,
|
|
"grad_norm": 0.3431030511856079,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4082,
|
|
"step": 2818
|
|
},
|
|
{
|
|
"epoch": 0.7760310962815177,
|
|
"grad_norm": 0.35572293400764465,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4279,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 0.7765814729455471,
|
|
"grad_norm": 0.33715927600860596,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4217,
|
|
"step": 2822
|
|
},
|
|
{
|
|
"epoch": 0.7771318496095766,
|
|
"grad_norm": 0.3827720582485199,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4195,
|
|
"step": 2824
|
|
},
|
|
{
|
|
"epoch": 0.777682226273606,
|
|
"grad_norm": 0.34325775504112244,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4359,
|
|
"step": 2826
|
|
},
|
|
{
|
|
"epoch": 0.7782326029376354,
|
|
"grad_norm": 0.34917858242988586,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4165,
|
|
"step": 2828
|
|
},
|
|
{
|
|
"epoch": 0.7787829796016649,
|
|
"grad_norm": 0.3705228865146637,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4234,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 0.7793333562656943,
|
|
"grad_norm": 0.36879298090934753,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4173,
|
|
"step": 2832
|
|
},
|
|
{
|
|
"epoch": 0.7798837329297238,
|
|
"grad_norm": 0.35160768032073975,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.427,
|
|
"step": 2834
|
|
},
|
|
{
|
|
"epoch": 0.7804341095937533,
|
|
"grad_norm": 0.35639581084251404,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4342,
|
|
"step": 2836
|
|
},
|
|
{
|
|
"epoch": 0.7809844862577827,
|
|
"grad_norm": 0.3821897804737091,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4143,
|
|
"step": 2838
|
|
},
|
|
{
|
|
"epoch": 0.7815348629218121,
|
|
"grad_norm": 0.35575130581855774,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4052,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 0.7820852395858415,
|
|
"grad_norm": 0.367026150226593,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4507,
|
|
"step": 2842
|
|
},
|
|
{
|
|
"epoch": 0.782635616249871,
|
|
"grad_norm": 0.35660848021507263,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4112,
|
|
"step": 2844
|
|
},
|
|
{
|
|
"epoch": 0.7831859929139005,
|
|
"grad_norm": 0.3623476028442383,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4298,
|
|
"step": 2846
|
|
},
|
|
{
|
|
"epoch": 0.7837363695779299,
|
|
"grad_norm": 0.36522987484931946,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4197,
|
|
"step": 2848
|
|
},
|
|
{
|
|
"epoch": 0.7842867462419594,
|
|
"grad_norm": 0.349153608083725,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4179,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 0.7848371229059888,
|
|
"grad_norm": 0.3868444263935089,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4309,
|
|
"step": 2852
|
|
},
|
|
{
|
|
"epoch": 0.7853874995700182,
|
|
"grad_norm": 0.3388199210166931,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4255,
|
|
"step": 2854
|
|
},
|
|
{
|
|
"epoch": 0.7859378762340476,
|
|
"grad_norm": 0.3848430812358856,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4248,
|
|
"step": 2856
|
|
},
|
|
{
|
|
"epoch": 0.7864882528980771,
|
|
"grad_norm": 0.34994250535964966,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.43,
|
|
"step": 2858
|
|
},
|
|
{
|
|
"epoch": 0.7870386295621066,
|
|
"grad_norm": 0.3475828170776367,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4245,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 0.787589006226136,
|
|
"grad_norm": 0.3643713593482971,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4285,
|
|
"step": 2862
|
|
},
|
|
{
|
|
"epoch": 0.7881393828901655,
|
|
"grad_norm": 0.3819843828678131,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4264,
|
|
"step": 2864
|
|
},
|
|
{
|
|
"epoch": 0.7886897595541948,
|
|
"grad_norm": 0.3636263608932495,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4354,
|
|
"step": 2866
|
|
},
|
|
{
|
|
"epoch": 0.7892401362182243,
|
|
"grad_norm": 0.35367467999458313,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4219,
|
|
"step": 2868
|
|
},
|
|
{
|
|
"epoch": 0.7897905128822538,
|
|
"grad_norm": 0.33511704206466675,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.427,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 0.7903408895462832,
|
|
"grad_norm": 0.3727225363254547,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4325,
|
|
"step": 2872
|
|
},
|
|
{
|
|
"epoch": 0.7908912662103127,
|
|
"grad_norm": 0.35963478684425354,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4331,
|
|
"step": 2874
|
|
},
|
|
{
|
|
"epoch": 0.7914416428743422,
|
|
"grad_norm": 0.3680688440799713,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.426,
|
|
"step": 2876
|
|
},
|
|
{
|
|
"epoch": 0.7919920195383716,
|
|
"grad_norm": 0.3594858646392822,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4251,
|
|
"step": 2878
|
|
},
|
|
{
|
|
"epoch": 0.792542396202401,
|
|
"grad_norm": 0.3666832745075226,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4148,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 0.7930927728664304,
|
|
"grad_norm": 0.3594750761985779,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.424,
|
|
"step": 2882
|
|
},
|
|
{
|
|
"epoch": 0.7936431495304599,
|
|
"grad_norm": 0.34796181321144104,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4188,
|
|
"step": 2884
|
|
},
|
|
{
|
|
"epoch": 0.7941935261944894,
|
|
"grad_norm": 0.3670448958873749,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4184,
|
|
"step": 2886
|
|
},
|
|
{
|
|
"epoch": 0.7947439028585188,
|
|
"grad_norm": 0.38206908106803894,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4333,
|
|
"step": 2888
|
|
},
|
|
{
|
|
"epoch": 0.7952942795225483,
|
|
"grad_norm": 0.3671881854534149,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4117,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 0.7958446561865777,
|
|
"grad_norm": 0.33647626638412476,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4098,
|
|
"step": 2892
|
|
},
|
|
{
|
|
"epoch": 0.7963950328506071,
|
|
"grad_norm": 0.3504905700683594,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4227,
|
|
"step": 2894
|
|
},
|
|
{
|
|
"epoch": 0.7969454095146365,
|
|
"grad_norm": 0.3571165204048157,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4126,
|
|
"step": 2896
|
|
},
|
|
{
|
|
"epoch": 0.797495786178666,
|
|
"grad_norm": 0.3529278337955475,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4198,
|
|
"step": 2898
|
|
},
|
|
{
|
|
"epoch": 0.7980461628426955,
|
|
"grad_norm": 0.3688133656978607,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.443,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 0.7985965395067249,
|
|
"grad_norm": 0.37664586305618286,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4345,
|
|
"step": 2902
|
|
},
|
|
{
|
|
"epoch": 0.7991469161707544,
|
|
"grad_norm": 0.37368759512901306,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4202,
|
|
"step": 2904
|
|
},
|
|
{
|
|
"epoch": 0.7996972928347839,
|
|
"grad_norm": 0.3880954384803772,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4234,
|
|
"step": 2906
|
|
},
|
|
{
|
|
"epoch": 0.8002476694988132,
|
|
"grad_norm": 0.34263235330581665,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4177,
|
|
"step": 2908
|
|
},
|
|
{
|
|
"epoch": 0.8007980461628427,
|
|
"grad_norm": 0.37408214807510376,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4366,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 0.8013484228268721,
|
|
"grad_norm": 0.35213685035705566,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.411,
|
|
"step": 2912
|
|
},
|
|
{
|
|
"epoch": 0.8018987994909016,
|
|
"grad_norm": 0.3545092046260834,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4378,
|
|
"step": 2914
|
|
},
|
|
{
|
|
"epoch": 0.802449176154931,
|
|
"grad_norm": 0.3618670701980591,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4187,
|
|
"step": 2916
|
|
},
|
|
{
|
|
"epoch": 0.8029995528189605,
|
|
"grad_norm": 0.3392831087112427,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4305,
|
|
"step": 2918
|
|
},
|
|
{
|
|
"epoch": 0.8035499294829899,
|
|
"grad_norm": 0.3700800836086273,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4212,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 0.8041003061470193,
|
|
"grad_norm": 0.35381945967674255,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.416,
|
|
"step": 2922
|
|
},
|
|
{
|
|
"epoch": 0.8046506828110488,
|
|
"grad_norm": 0.3526875972747803,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4287,
|
|
"step": 2924
|
|
},
|
|
{
|
|
"epoch": 0.8052010594750783,
|
|
"grad_norm": 0.3656879663467407,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4196,
|
|
"step": 2926
|
|
},
|
|
{
|
|
"epoch": 0.8057514361391077,
|
|
"grad_norm": 0.3675120174884796,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.419,
|
|
"step": 2928
|
|
},
|
|
{
|
|
"epoch": 0.8063018128031372,
|
|
"grad_norm": 0.34032610058784485,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4301,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 0.8068521894671666,
|
|
"grad_norm": 0.39022547006607056,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4347,
|
|
"step": 2932
|
|
},
|
|
{
|
|
"epoch": 0.807402566131196,
|
|
"grad_norm": 0.38301143050193787,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4289,
|
|
"step": 2934
|
|
},
|
|
{
|
|
"epoch": 0.8079529427952254,
|
|
"grad_norm": 0.34974217414855957,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4233,
|
|
"step": 2936
|
|
},
|
|
{
|
|
"epoch": 0.8085033194592549,
|
|
"grad_norm": 0.3554193377494812,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4078,
|
|
"step": 2938
|
|
},
|
|
{
|
|
"epoch": 0.8090536961232844,
|
|
"grad_norm": 0.3496205806732178,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4241,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 0.8096040727873138,
|
|
"grad_norm": 0.3549167513847351,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4281,
|
|
"step": 2942
|
|
},
|
|
{
|
|
"epoch": 0.8101544494513433,
|
|
"grad_norm": 0.3635149896144867,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4307,
|
|
"step": 2944
|
|
},
|
|
{
|
|
"epoch": 0.8107048261153728,
|
|
"grad_norm": 0.36100322008132935,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4352,
|
|
"step": 2946
|
|
},
|
|
{
|
|
"epoch": 0.8112552027794021,
|
|
"grad_norm": 0.36892169713974,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4245,
|
|
"step": 2948
|
|
},
|
|
{
|
|
"epoch": 0.8118055794434316,
|
|
"grad_norm": 0.34998342394828796,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4214,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 0.812355956107461,
|
|
"grad_norm": 0.36382123827934265,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4342,
|
|
"step": 2952
|
|
},
|
|
{
|
|
"epoch": 0.8129063327714905,
|
|
"grad_norm": 0.361068457365036,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4198,
|
|
"step": 2954
|
|
},
|
|
{
|
|
"epoch": 0.81345670943552,
|
|
"grad_norm": 0.36285367608070374,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4297,
|
|
"step": 2956
|
|
},
|
|
{
|
|
"epoch": 0.8140070860995494,
|
|
"grad_norm": 0.3376438319683075,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.3999,
|
|
"step": 2958
|
|
},
|
|
{
|
|
"epoch": 0.8145574627635789,
|
|
"grad_norm": 0.35821884870529175,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4283,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 0.8151078394276082,
|
|
"grad_norm": 0.37185990810394287,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4221,
|
|
"step": 2962
|
|
},
|
|
{
|
|
"epoch": 0.8156582160916377,
|
|
"grad_norm": 0.3599165380001068,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4222,
|
|
"step": 2964
|
|
},
|
|
{
|
|
"epoch": 0.8162085927556672,
|
|
"grad_norm": 0.3599473237991333,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4211,
|
|
"step": 2966
|
|
},
|
|
{
|
|
"epoch": 0.8167589694196966,
|
|
"grad_norm": 0.3631754219532013,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4273,
|
|
"step": 2968
|
|
},
|
|
{
|
|
"epoch": 0.8173093460837261,
|
|
"grad_norm": 0.34736868739128113,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4175,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 0.8178597227477555,
|
|
"grad_norm": 0.34098127484321594,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4297,
|
|
"step": 2972
|
|
},
|
|
{
|
|
"epoch": 0.8184100994117849,
|
|
"grad_norm": 0.3562553822994232,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4342,
|
|
"step": 2974
|
|
},
|
|
{
|
|
"epoch": 0.8189604760758143,
|
|
"grad_norm": 0.3628046214580536,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4146,
|
|
"step": 2976
|
|
},
|
|
{
|
|
"epoch": 0.8195108527398438,
|
|
"grad_norm": 0.33993610739707947,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4228,
|
|
"step": 2978
|
|
},
|
|
{
|
|
"epoch": 0.8200612294038733,
|
|
"grad_norm": 0.35291528701782227,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4179,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 0.8206116060679027,
|
|
"grad_norm": 0.3480774164199829,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4099,
|
|
"step": 2982
|
|
},
|
|
{
|
|
"epoch": 0.8211619827319322,
|
|
"grad_norm": 0.36476173996925354,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4153,
|
|
"step": 2984
|
|
},
|
|
{
|
|
"epoch": 0.8217123593959617,
|
|
"grad_norm": 0.3587859869003296,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4334,
|
|
"step": 2986
|
|
},
|
|
{
|
|
"epoch": 0.822262736059991,
|
|
"grad_norm": 0.38419267535209656,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4357,
|
|
"step": 2988
|
|
},
|
|
{
|
|
"epoch": 0.8228131127240205,
|
|
"grad_norm": 0.3496173024177551,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4156,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 0.8233634893880499,
|
|
"grad_norm": 0.36481598019599915,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4108,
|
|
"step": 2992
|
|
},
|
|
{
|
|
"epoch": 0.8239138660520794,
|
|
"grad_norm": 0.36568546295166016,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4329,
|
|
"step": 2994
|
|
},
|
|
{
|
|
"epoch": 0.8244642427161089,
|
|
"grad_norm": 0.3675042390823364,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4301,
|
|
"step": 2996
|
|
},
|
|
{
|
|
"epoch": 0.8250146193801383,
|
|
"grad_norm": 0.3355284035205841,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4162,
|
|
"step": 2998
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"grad_norm": 0.34280914068222046,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4168,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_merge_loss": 0.3659045696258545,
|
|
"eval_merge_runtime": 599.8368,
|
|
"eval_merge_samples_per_second": 56.269,
|
|
"eval_merge_steps_per_second": 2.346,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.48660770058631897,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.3383,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 74.19,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 3.095,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_sharegpt_gpt4.json_loss": 0.7358890175819397,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.7081,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.692,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.46,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_Table_GPT.json_loss": 0.045936468988657,
|
|
"eval_Table_GPT.json_runtime": 24.9946,
|
|
"eval_Table_GPT.json_samples_per_second": 83.738,
|
|
"eval_Table_GPT.json_steps_per_second": 3.521,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_gpt_4o_200k.json_loss": 0.7624426484107971,
|
|
"eval_gpt_4o_200k.json_runtime": 48.6264,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.169,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.388,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_multi_turn_datas.json_loss": 0.2812780439853668,
|
|
"eval_multi_turn_datas.json_runtime": 75.8593,
|
|
"eval_multi_turn_datas.json_samples_per_second": 52.756,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.201,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_table_python_code_datas.json_loss": 0.24670127034187317,
|
|
"eval_table_python_code_datas.json_runtime": 43.2305,
|
|
"eval_table_python_code_datas.json_samples_per_second": 49.942,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.082,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_tabular_llm_data.json_loss": 0.08318436145782471,
|
|
"eval_tabular_llm_data.json_runtime": 8.561,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.735,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.285,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_python_code_critic_21k.json_loss": 0.5459744930267334,
|
|
"eval_python_code_critic_21k.json_runtime": 3.2232,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 185.217,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.756,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_all_merge_table_dataset.json_loss": 0.07661881297826767,
|
|
"eval_all_merge_table_dataset.json_runtime": 23.3773,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 30.457,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.283,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.5640604496002197,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.4865,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 67.751,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.832,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_ultrainteract_sft.json_loss": 0.40351128578186035,
|
|
"eval_ultrainteract_sft.json_runtime": 8.6435,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 168.449,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.057,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.09340357035398483,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.1267,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 268.437,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.79,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_sft_react_sql_datas.json_loss": 0.614182710647583,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.8427,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 40.037,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.785,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_all_merge_code.json_loss": 0.2747681736946106,
|
|
"eval_all_merge_code.json_runtime": 0.3335,
|
|
"eval_all_merge_code.json_samples_per_second": 188.917,
|
|
"eval_all_merge_code.json_steps_per_second": 8.996,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_magpie_datas.json_loss": 0.42281365394592285,
|
|
"eval_magpie_datas.json_runtime": 2.2171,
|
|
"eval_magpie_datas.json_samples_per_second": 77.579,
|
|
"eval_magpie_datas.json_steps_per_second": 3.608,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_train_data_for_qwen.json_loss": 0.0027365919668227434,
|
|
"eval_train_data_for_qwen.json_runtime": 0.2454,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 40.756,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.076,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_alpaca_cleaned.json_loss": 0.9086716175079346,
|
|
"eval_alpaca_cleaned.json_runtime": 0.1143,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 236.118,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.49,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_agent_instruct.json_loss": 0.20960307121276855,
|
|
"eval_agent_instruct.json_runtime": 0.5163,
|
|
"eval_agent_instruct.json_samples_per_second": 92.971,
|
|
"eval_agent_instruct.json_steps_per_second": 3.874,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_MathInstruct.json_loss": 0.20019014179706573,
|
|
"eval_MathInstruct.json_runtime": 0.3582,
|
|
"eval_MathInstruct.json_samples_per_second": 159.116,
|
|
"eval_MathInstruct.json_steps_per_second": 8.375,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.44821104407310486,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.3022,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 112.526,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.619,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.008376230485737324,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.1003,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 229.41,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 9.974,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.513078212738037,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.0516,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 310.009,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.376,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_alpaca_gpt4_zh.json_loss": 0.9633126258850098,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.0499,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 220.561,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 20.051,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8255649960441678,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.5788259506225586,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.4854,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 41.202,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.06,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.8261153727081971,
|
|
"grad_norm": 0.35386523604393005,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4235,
|
|
"step": 3002
|
|
},
|
|
{
|
|
"epoch": 0.8266657493722266,
|
|
"grad_norm": 0.35325145721435547,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4296,
|
|
"step": 3004
|
|
},
|
|
{
|
|
"epoch": 0.827216126036256,
|
|
"grad_norm": 0.35455331206321716,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.405,
|
|
"step": 3006
|
|
},
|
|
{
|
|
"epoch": 0.8277665027002855,
|
|
"grad_norm": 0.37510380148887634,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4288,
|
|
"step": 3008
|
|
},
|
|
{
|
|
"epoch": 0.828316879364315,
|
|
"grad_norm": 0.356189489364624,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4145,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 0.8288672560283444,
|
|
"grad_norm": 0.36097854375839233,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4247,
|
|
"step": 3012
|
|
},
|
|
{
|
|
"epoch": 0.8294176326923739,
|
|
"grad_norm": 0.3489934802055359,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.422,
|
|
"step": 3014
|
|
},
|
|
{
|
|
"epoch": 0.8299680093564032,
|
|
"grad_norm": 0.36287152767181396,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4025,
|
|
"step": 3016
|
|
},
|
|
{
|
|
"epoch": 0.8305183860204327,
|
|
"grad_norm": 0.3664880096912384,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4278,
|
|
"step": 3018
|
|
},
|
|
{
|
|
"epoch": 0.8310687626844622,
|
|
"grad_norm": 0.35230088233947754,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4233,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 0.8316191393484916,
|
|
"grad_norm": 0.3595122694969177,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4173,
|
|
"step": 3022
|
|
},
|
|
{
|
|
"epoch": 0.8321695160125211,
|
|
"grad_norm": 0.3618360757827759,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4213,
|
|
"step": 3024
|
|
},
|
|
{
|
|
"epoch": 0.8327198926765506,
|
|
"grad_norm": 0.3699500858783722,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4267,
|
|
"step": 3026
|
|
},
|
|
{
|
|
"epoch": 0.8332702693405799,
|
|
"grad_norm": 0.37343189120292664,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4288,
|
|
"step": 3028
|
|
},
|
|
{
|
|
"epoch": 0.8338206460046094,
|
|
"grad_norm": 0.34580445289611816,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4232,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 0.8343710226686388,
|
|
"grad_norm": 0.3410281836986542,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4272,
|
|
"step": 3032
|
|
},
|
|
{
|
|
"epoch": 0.8349213993326683,
|
|
"grad_norm": 0.37444379925727844,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4264,
|
|
"step": 3034
|
|
},
|
|
{
|
|
"epoch": 0.8354717759966978,
|
|
"grad_norm": 0.359546959400177,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4243,
|
|
"step": 3036
|
|
},
|
|
{
|
|
"epoch": 0.8360221526607272,
|
|
"grad_norm": 0.3611339032649994,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4243,
|
|
"step": 3038
|
|
},
|
|
{
|
|
"epoch": 0.8365725293247567,
|
|
"grad_norm": 0.3678295612335205,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4089,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 0.837122905988786,
|
|
"grad_norm": 0.37094810605049133,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4191,
|
|
"step": 3042
|
|
},
|
|
{
|
|
"epoch": 0.8376732826528155,
|
|
"grad_norm": 0.354481041431427,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4235,
|
|
"step": 3044
|
|
},
|
|
{
|
|
"epoch": 0.838223659316845,
|
|
"grad_norm": 0.3498587906360626,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.431,
|
|
"step": 3046
|
|
},
|
|
{
|
|
"epoch": 0.8387740359808744,
|
|
"grad_norm": 0.35214436054229736,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4132,
|
|
"step": 3048
|
|
},
|
|
{
|
|
"epoch": 0.8393244126449039,
|
|
"grad_norm": 0.35119178891181946,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4161,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 0.8398747893089333,
|
|
"grad_norm": 0.3671429753303528,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4276,
|
|
"step": 3052
|
|
},
|
|
{
|
|
"epoch": 0.8404251659729628,
|
|
"grad_norm": 0.3626399636268616,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.417,
|
|
"step": 3054
|
|
},
|
|
{
|
|
"epoch": 0.8409755426369921,
|
|
"grad_norm": 0.3819148540496826,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4261,
|
|
"step": 3056
|
|
},
|
|
{
|
|
"epoch": 0.8415259193010216,
|
|
"grad_norm": 0.3481554687023163,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4339,
|
|
"step": 3058
|
|
},
|
|
{
|
|
"epoch": 0.8420762959650511,
|
|
"grad_norm": 0.3603340983390808,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.406,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 0.8426266726290805,
|
|
"grad_norm": 0.3565911650657654,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4245,
|
|
"step": 3062
|
|
},
|
|
{
|
|
"epoch": 0.84317704929311,
|
|
"grad_norm": 0.36305105686187744,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4255,
|
|
"step": 3064
|
|
},
|
|
{
|
|
"epoch": 0.8437274259571395,
|
|
"grad_norm": 0.33078432083129883,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4045,
|
|
"step": 3066
|
|
},
|
|
{
|
|
"epoch": 0.8442778026211689,
|
|
"grad_norm": 0.346562922000885,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4279,
|
|
"step": 3068
|
|
},
|
|
{
|
|
"epoch": 0.8448281792851983,
|
|
"grad_norm": 0.36170172691345215,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4139,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 0.8453785559492277,
|
|
"grad_norm": 0.360568106174469,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4276,
|
|
"step": 3072
|
|
},
|
|
{
|
|
"epoch": 0.8459289326132572,
|
|
"grad_norm": 0.38023245334625244,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4317,
|
|
"step": 3074
|
|
},
|
|
{
|
|
"epoch": 0.8464793092772867,
|
|
"grad_norm": 0.344732403755188,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4109,
|
|
"step": 3076
|
|
},
|
|
{
|
|
"epoch": 0.8470296859413161,
|
|
"grad_norm": 0.35157695412635803,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4192,
|
|
"step": 3078
|
|
},
|
|
{
|
|
"epoch": 0.8475800626053456,
|
|
"grad_norm": 0.36455512046813965,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4247,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 0.8481304392693749,
|
|
"grad_norm": 0.39768150448799133,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4383,
|
|
"step": 3082
|
|
},
|
|
{
|
|
"epoch": 0.8486808159334044,
|
|
"grad_norm": 0.38052836060523987,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4199,
|
|
"step": 3084
|
|
},
|
|
{
|
|
"epoch": 0.8492311925974338,
|
|
"grad_norm": 0.3625752925872803,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4161,
|
|
"step": 3086
|
|
},
|
|
{
|
|
"epoch": 0.8497815692614633,
|
|
"grad_norm": 0.3708571493625641,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4402,
|
|
"step": 3088
|
|
},
|
|
{
|
|
"epoch": 0.8503319459254928,
|
|
"grad_norm": 0.3581870496273041,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4376,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 0.8508823225895222,
|
|
"grad_norm": 0.33589842915534973,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4144,
|
|
"step": 3092
|
|
},
|
|
{
|
|
"epoch": 0.8514326992535517,
|
|
"grad_norm": 0.35838133096694946,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4128,
|
|
"step": 3094
|
|
},
|
|
{
|
|
"epoch": 0.851983075917581,
|
|
"grad_norm": 0.3660927712917328,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.42,
|
|
"step": 3096
|
|
},
|
|
{
|
|
"epoch": 0.8525334525816105,
|
|
"grad_norm": 0.3606925904750824,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4288,
|
|
"step": 3098
|
|
},
|
|
{
|
|
"epoch": 0.85308382924564,
|
|
"grad_norm": 0.3437570333480835,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4213,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 0.8536342059096694,
|
|
"grad_norm": 0.35351496934890747,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4238,
|
|
"step": 3102
|
|
},
|
|
{
|
|
"epoch": 0.8541845825736989,
|
|
"grad_norm": 0.3595280051231384,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4107,
|
|
"step": 3104
|
|
},
|
|
{
|
|
"epoch": 0.8547349592377284,
|
|
"grad_norm": 0.3546600937843323,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4105,
|
|
"step": 3106
|
|
},
|
|
{
|
|
"epoch": 0.8552853359017578,
|
|
"grad_norm": 0.3654036819934845,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4158,
|
|
"step": 3108
|
|
},
|
|
{
|
|
"epoch": 0.8558357125657872,
|
|
"grad_norm": 0.3742349445819855,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4217,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 0.8563860892298166,
|
|
"grad_norm": 0.35527029633522034,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.414,
|
|
"step": 3112
|
|
},
|
|
{
|
|
"epoch": 0.8569364658938461,
|
|
"grad_norm": 0.3408162295818329,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4245,
|
|
"step": 3114
|
|
},
|
|
{
|
|
"epoch": 0.8574868425578756,
|
|
"grad_norm": 0.3608722686767578,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4346,
|
|
"step": 3116
|
|
},
|
|
{
|
|
"epoch": 0.858037219221905,
|
|
"grad_norm": 0.36163628101348877,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.422,
|
|
"step": 3118
|
|
},
|
|
{
|
|
"epoch": 0.8585875958859345,
|
|
"grad_norm": 0.35417988896369934,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4101,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 0.8591379725499639,
|
|
"grad_norm": 0.3626682162284851,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4147,
|
|
"step": 3122
|
|
},
|
|
{
|
|
"epoch": 0.8596883492139933,
|
|
"grad_norm": 0.34313321113586426,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4215,
|
|
"step": 3124
|
|
},
|
|
{
|
|
"epoch": 0.8602387258780227,
|
|
"grad_norm": 0.3839293122291565,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4173,
|
|
"step": 3126
|
|
},
|
|
{
|
|
"epoch": 0.8607891025420522,
|
|
"grad_norm": 0.3548083007335663,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4153,
|
|
"step": 3128
|
|
},
|
|
{
|
|
"epoch": 0.8613394792060817,
|
|
"grad_norm": 0.35141652822494507,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4066,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 0.8618898558701111,
|
|
"grad_norm": 0.3777351975440979,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4128,
|
|
"step": 3132
|
|
},
|
|
{
|
|
"epoch": 0.8624402325341406,
|
|
"grad_norm": 0.3580491840839386,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4048,
|
|
"step": 3134
|
|
},
|
|
{
|
|
"epoch": 0.86299060919817,
|
|
"grad_norm": 0.373532772064209,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4173,
|
|
"step": 3136
|
|
},
|
|
{
|
|
"epoch": 0.8635409858621994,
|
|
"grad_norm": 0.35365086793899536,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4076,
|
|
"step": 3138
|
|
},
|
|
{
|
|
"epoch": 0.8640913625262289,
|
|
"grad_norm": 0.3887852728366852,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.418,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 0.8646417391902583,
|
|
"grad_norm": 0.35862478613853455,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4234,
|
|
"step": 3142
|
|
},
|
|
{
|
|
"epoch": 0.8651921158542878,
|
|
"grad_norm": 0.3472420275211334,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4132,
|
|
"step": 3144
|
|
},
|
|
{
|
|
"epoch": 0.8657424925183173,
|
|
"grad_norm": 0.344862163066864,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.41,
|
|
"step": 3146
|
|
},
|
|
{
|
|
"epoch": 0.8662928691823467,
|
|
"grad_norm": 0.35329338908195496,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4152,
|
|
"step": 3148
|
|
},
|
|
{
|
|
"epoch": 0.8668432458463761,
|
|
"grad_norm": 0.3792724907398224,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4307,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 0.8673936225104055,
|
|
"grad_norm": 0.3611691892147064,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4166,
|
|
"step": 3152
|
|
},
|
|
{
|
|
"epoch": 0.867943999174435,
|
|
"grad_norm": 0.35675716400146484,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4309,
|
|
"step": 3154
|
|
},
|
|
{
|
|
"epoch": 0.8684943758384644,
|
|
"grad_norm": 0.37591055035591125,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4174,
|
|
"step": 3156
|
|
},
|
|
{
|
|
"epoch": 0.8690447525024939,
|
|
"grad_norm": 0.34695202112197876,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4067,
|
|
"step": 3158
|
|
},
|
|
{
|
|
"epoch": 0.8695951291665234,
|
|
"grad_norm": 0.36810246109962463,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4236,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 0.8701455058305528,
|
|
"grad_norm": 0.3910383880138397,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4344,
|
|
"step": 3162
|
|
},
|
|
{
|
|
"epoch": 0.8706958824945822,
|
|
"grad_norm": 0.3465210497379303,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4156,
|
|
"step": 3164
|
|
},
|
|
{
|
|
"epoch": 0.8712462591586116,
|
|
"grad_norm": 0.39839833974838257,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.417,
|
|
"step": 3166
|
|
},
|
|
{
|
|
"epoch": 0.8717966358226411,
|
|
"grad_norm": 0.33419859409332275,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4131,
|
|
"step": 3168
|
|
},
|
|
{
|
|
"epoch": 0.8723470124866706,
|
|
"grad_norm": 0.3657875955104828,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4243,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 0.8728973891507,
|
|
"grad_norm": 0.35600635409355164,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4251,
|
|
"step": 3172
|
|
},
|
|
{
|
|
"epoch": 0.8734477658147295,
|
|
"grad_norm": 0.3642902374267578,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4334,
|
|
"step": 3174
|
|
},
|
|
{
|
|
"epoch": 0.873998142478759,
|
|
"grad_norm": 0.35452064871788025,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4244,
|
|
"step": 3176
|
|
},
|
|
{
|
|
"epoch": 0.8745485191427883,
|
|
"grad_norm": 0.372953861951828,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.421,
|
|
"step": 3178
|
|
},
|
|
{
|
|
"epoch": 0.8750988958068178,
|
|
"grad_norm": 0.3428981900215149,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4172,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 0.8756492724708472,
|
|
"grad_norm": 0.36314892768859863,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4008,
|
|
"step": 3182
|
|
},
|
|
{
|
|
"epoch": 0.8761996491348767,
|
|
"grad_norm": 0.35167455673217773,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4114,
|
|
"step": 3184
|
|
},
|
|
{
|
|
"epoch": 0.8767500257989062,
|
|
"grad_norm": 0.3496149778366089,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4053,
|
|
"step": 3186
|
|
},
|
|
{
|
|
"epoch": 0.8773004024629356,
|
|
"grad_norm": 0.351510226726532,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4173,
|
|
"step": 3188
|
|
},
|
|
{
|
|
"epoch": 0.877850779126965,
|
|
"grad_norm": 0.35172203183174133,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4202,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 0.8784011557909944,
|
|
"grad_norm": 0.36200663447380066,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.3987,
|
|
"step": 3192
|
|
},
|
|
{
|
|
"epoch": 0.8789515324550239,
|
|
"grad_norm": 0.36070528626441956,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4202,
|
|
"step": 3194
|
|
},
|
|
{
|
|
"epoch": 0.8795019091190533,
|
|
"grad_norm": 0.37506040930747986,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4125,
|
|
"step": 3196
|
|
},
|
|
{
|
|
"epoch": 0.8800522857830828,
|
|
"grad_norm": 0.3433153033256531,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4185,
|
|
"step": 3198
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"grad_norm": 0.3672421872615814,
|
|
"learning_rate": 1e-05,
|
|
"loss": 0.4227,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_merge_loss": 0.36182981729507446,
|
|
"eval_merge_runtime": 600.3542,
|
|
"eval_merge_samples_per_second": 56.22,
|
|
"eval_merge_steps_per_second": 2.344,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_new_aug_datas_filtered.json_loss": 0.4833287298679352,
|
|
"eval_new_aug_datas_filtered.json_runtime": 10.3758,
|
|
"eval_new_aug_datas_filtered.json_samples_per_second": 73.922,
|
|
"eval_new_aug_datas_filtered.json_steps_per_second": 3.084,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_sharegpt_gpt4.json_loss": 0.7305224537849426,
|
|
"eval_sharegpt_gpt4.json_runtime": 31.7036,
|
|
"eval_sharegpt_gpt4.json_samples_per_second": 58.7,
|
|
"eval_sharegpt_gpt4.json_steps_per_second": 2.46,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_Table_GPT.json_loss": 0.04232589527964592,
|
|
"eval_Table_GPT.json_runtime": 24.994,
|
|
"eval_Table_GPT.json_samples_per_second": 83.74,
|
|
"eval_Table_GPT.json_steps_per_second": 3.521,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_gpt_4o_200k.json_loss": 0.7571491003036499,
|
|
"eval_gpt_4o_200k.json_runtime": 48.5629,
|
|
"eval_gpt_4o_200k.json_samples_per_second": 129.337,
|
|
"eval_gpt_4o_200k.json_steps_per_second": 5.395,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_multi_turn_datas.json_loss": 0.2720319330692291,
|
|
"eval_multi_turn_datas.json_runtime": 75.6646,
|
|
"eval_multi_turn_datas.json_samples_per_second": 52.891,
|
|
"eval_multi_turn_datas.json_steps_per_second": 2.207,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_table_python_code_datas.json_loss": 0.24331320822238922,
|
|
"eval_table_python_code_datas.json_runtime": 43.0385,
|
|
"eval_table_python_code_datas.json_samples_per_second": 50.164,
|
|
"eval_table_python_code_datas.json_steps_per_second": 2.091,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_tabular_llm_data.json_loss": 0.0862693339586258,
|
|
"eval_tabular_llm_data.json_runtime": 8.5454,
|
|
"eval_tabular_llm_data.json_samples_per_second": 28.788,
|
|
"eval_tabular_llm_data.json_steps_per_second": 1.287,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_python_code_critic_21k.json_loss": 0.5425785183906555,
|
|
"eval_python_code_critic_21k.json_runtime": 3.2194,
|
|
"eval_python_code_critic_21k.json_samples_per_second": 185.437,
|
|
"eval_python_code_critic_21k.json_steps_per_second": 7.765,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_all_merge_table_dataset.json_loss": 0.06970688700675964,
|
|
"eval_all_merge_table_dataset.json_runtime": 23.3201,
|
|
"eval_all_merge_table_dataset.json_samples_per_second": 30.532,
|
|
"eval_all_merge_table_dataset.json_steps_per_second": 1.286,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_code_feedback_multi_turn.json_loss": 0.5619133114814758,
|
|
"eval_code_feedback_multi_turn.json_runtime": 32.4257,
|
|
"eval_code_feedback_multi_turn.json_samples_per_second": 67.878,
|
|
"eval_code_feedback_multi_turn.json_steps_per_second": 2.837,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_ultrainteract_sft.json_loss": 0.4016903042793274,
|
|
"eval_ultrainteract_sft.json_runtime": 8.6472,
|
|
"eval_ultrainteract_sft.json_samples_per_second": 168.378,
|
|
"eval_ultrainteract_sft.json_steps_per_second": 7.054,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_synthetic_text_to_sql.json_loss": 0.09171026945114136,
|
|
"eval_synthetic_text_to_sql.json_runtime": 0.1264,
|
|
"eval_synthetic_text_to_sql.json_samples_per_second": 268.914,
|
|
"eval_synthetic_text_to_sql.json_steps_per_second": 15.818,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_sft_react_sql_datas.json_loss": 0.6105172038078308,
|
|
"eval_sft_react_sql_datas.json_runtime": 7.8425,
|
|
"eval_sft_react_sql_datas.json_samples_per_second": 40.038,
|
|
"eval_sft_react_sql_datas.json_steps_per_second": 1.785,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_all_merge_code.json_loss": 0.264506459236145,
|
|
"eval_all_merge_code.json_runtime": 0.3347,
|
|
"eval_all_merge_code.json_samples_per_second": 188.208,
|
|
"eval_all_merge_code.json_steps_per_second": 8.962,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_magpie_datas.json_loss": 0.4236694872379303,
|
|
"eval_magpie_datas.json_runtime": 2.213,
|
|
"eval_magpie_datas.json_samples_per_second": 77.723,
|
|
"eval_magpie_datas.json_steps_per_second": 3.615,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_train_data_for_qwen.json_loss": 0.0027615067083388567,
|
|
"eval_train_data_for_qwen.json_runtime": 0.2435,
|
|
"eval_train_data_for_qwen.json_samples_per_second": 41.06,
|
|
"eval_train_data_for_qwen.json_steps_per_second": 4.106,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_alpaca_cleaned.json_loss": 0.9028782844543457,
|
|
"eval_alpaca_cleaned.json_runtime": 0.1145,
|
|
"eval_alpaca_cleaned.json_samples_per_second": 235.866,
|
|
"eval_alpaca_cleaned.json_steps_per_second": 17.472,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_agent_instruct.json_loss": 0.20418775081634521,
|
|
"eval_agent_instruct.json_runtime": 0.5123,
|
|
"eval_agent_instruct.json_samples_per_second": 93.693,
|
|
"eval_agent_instruct.json_steps_per_second": 3.904,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_MathInstruct.json_loss": 0.2024046629667282,
|
|
"eval_MathInstruct.json_runtime": 0.35,
|
|
"eval_MathInstruct.json_samples_per_second": 162.858,
|
|
"eval_MathInstruct.json_steps_per_second": 8.571,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_tested_143k_python_alpaca.json_loss": 0.4462108612060547,
|
|
"eval_tested_143k_python_alpaca.json_runtime": 0.3037,
|
|
"eval_tested_143k_python_alpaca.json_samples_per_second": 111.97,
|
|
"eval_tested_143k_python_alpaca.json_steps_per_second": 6.586,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_xlam_function_calling_60k.json_loss": 0.008976898156106472,
|
|
"eval_xlam_function_calling_60k.json_runtime": 0.1004,
|
|
"eval_xlam_function_calling_60k.json_samples_per_second": 229.083,
|
|
"eval_xlam_function_calling_60k.json_steps_per_second": 9.96,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_alpaca_data_gpt4_chinese.json_loss": 1.512216567993164,
|
|
"eval_alpaca_data_gpt4_chinese.json_runtime": 0.0511,
|
|
"eval_alpaca_data_gpt4_chinese.json_samples_per_second": 313.242,
|
|
"eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.578,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_alpaca_gpt4_zh.json_loss": 0.9716835021972656,
|
|
"eval_alpaca_gpt4_zh.json_runtime": 0.0498,
|
|
"eval_alpaca_gpt4_zh.json_samples_per_second": 220.769,
|
|
"eval_alpaca_gpt4_zh.json_steps_per_second": 20.07,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 0.8806026624471123,
|
|
"eval_codefeedback_filtered_instruction.json_loss": 0.5663765072822571,
|
|
"eval_codefeedback_filtered_instruction.json_runtime": 0.4857,
|
|
"eval_codefeedback_filtered_instruction.json_samples_per_second": 41.176,
|
|
"eval_codefeedback_filtered_instruction.json_steps_per_second": 2.059,
|
|
"step": 3200
|
|
}
|
|
],
|
|
"logging_steps": 2,
|
|
"max_steps": 3633,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.4654685450828094e+20,
|
|
"train_batch_size": 3,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|